85ad10e62b69eac1f25ee1f855293f3f30058144
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 /**
55  * @file
56  * @brief IPv4 Forwarding.
57  *
58  * This file contains the source code for IPv4 forwarding.
59  */
60
61 void
62 ip4_forward_next_trace (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame,
65                         vlib_rx_or_tx_t which_adj_index);
66
67 always_inline uword
68 ip4_lookup_inline (vlib_main_t * vm,
69                    vlib_node_runtime_t * node,
70                    vlib_frame_t * frame,
71                    int lookup_for_responses_to_locally_received_packets)
72 {
73   ip4_main_t * im = &ip4_main;
74   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75   u32 n_left_from, n_left_to_next, * from, * to_next;
76   ip_lookup_next_t next;
77   u32 cpu_index = os_get_cpu_number();
78
79   from = vlib_frame_vector_args (frame);
80   n_left_from = frame->n_vectors;
81   next = node->cached_next_index;
82
83   while (n_left_from > 0)
84     {
85       vlib_get_next_frame (vm, node, next,
86                            to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t * p0, * p1, * p2, * p3;
91           ip4_header_t * ip0, * ip1, * ip2, * ip3;
92           __attribute__((unused)) tcp_header_t * tcp0, * tcp1, * tcp2, * tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t * lb0, * lb1, * lb2, * lb3;
95           ip4_fib_mtrie_t * mtrie0, * mtrie1, * mtrie2, * mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t * dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100           __attribute__((unused)) u32 pi2, fib_index2, lb_index2, is_tcp_udp2;
101           __attribute__((unused)) u32 pi3, fib_index3, lb_index3, is_tcp_udp3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t * p4, * p5, * p6, * p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
153           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
154           fib_index2 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p3)->sw_if_index[VLIB_RX]);
156           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
157             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
158           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
159             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
160           fib_index2 = (vnet_buffer(p2)->sw_if_index[VLIB_TX] == (u32)~0) ?
161             fib_index2 : vnet_buffer(p2)->sw_if_index[VLIB_TX];
162           fib_index3 = (vnet_buffer(p3)->sw_if_index[VLIB_TX] == (u32)~0) ?
163             fib_index3 : vnet_buffer(p3)->sw_if_index[VLIB_TX];
164
165
166           if (! lookup_for_responses_to_locally_received_packets)
167             {
168               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
169               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
170               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
171               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
172
173               leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
174
175               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
176               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
177               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
178               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
179             }
180
181           tcp0 = (void *) (ip0 + 1);
182           tcp1 = (void *) (ip1 + 1);
183           tcp2 = (void *) (ip2 + 1);
184           tcp3 = (void *) (ip3 + 1);
185
186           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
187                          || ip0->protocol == IP_PROTOCOL_UDP);
188           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
189                          || ip1->protocol == IP_PROTOCOL_UDP);
190           is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
191                          || ip2->protocol == IP_PROTOCOL_UDP);
192           is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
193                          || ip1->protocol == IP_PROTOCOL_UDP);
194
195           if (! lookup_for_responses_to_locally_received_packets)
196             {
197               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
198               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
199               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
200               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
201             }
202
203           if (! lookup_for_responses_to_locally_received_packets)
204             {
205               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
206               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
207               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
208               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
209             }
210
211           if (! lookup_for_responses_to_locally_received_packets)
212             {
213               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
214               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
215               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
216               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
217             }
218
219           if (lookup_for_responses_to_locally_received_packets)
220             {
221               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
222               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
223               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
224               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
225             }
226           else
227             {
228               /* Handle default route. */
229               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
230               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
231               leaf2 = (leaf2 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
232               leaf3 = (leaf3 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
233               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
234               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
235               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
236               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
237             }
238
239           lb0 = load_balance_get (lb_index0);
240           lb1 = load_balance_get (lb_index1);
241           lb2 = load_balance_get (lb_index2);
242           lb3 = load_balance_get (lb_index3);
243
244           /* Use flow hash to compute multipath adjacency. */
245           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
246           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
247           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
248           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
249           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
250             {
251               flow_hash_config0 = lb0->lb_hash_config;
252               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
253                 ip4_compute_flow_hash (ip0, flow_hash_config0);
254             }
255           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
256             {
257               flow_hash_config1 = lb1->lb_hash_config;
258               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
259                 ip4_compute_flow_hash (ip1, flow_hash_config1);
260             }
261           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
262             {
263               flow_hash_config2 = lb2->lb_hash_config;
264               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
265                 ip4_compute_flow_hash (ip2, flow_hash_config2);
266             }
267           if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
268             {
269               flow_hash_config3 = lb3->lb_hash_config;
270               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
271                 ip4_compute_flow_hash (ip3, flow_hash_config3);
272             }
273
274           ASSERT (lb0->lb_n_buckets > 0);
275           ASSERT (is_pow2 (lb0->lb_n_buckets));
276           ASSERT (lb1->lb_n_buckets > 0);
277           ASSERT (is_pow2 (lb1->lb_n_buckets));
278           ASSERT (lb2->lb_n_buckets > 0);
279           ASSERT (is_pow2 (lb2->lb_n_buckets));
280           ASSERT (lb3->lb_n_buckets > 0);
281           ASSERT (is_pow2 (lb3->lb_n_buckets));
282
283           dpo0 = load_balance_get_bucket_i(lb0,
284                                            (hash_c0 &
285                                             (lb0->lb_n_buckets_minus_1)));
286           dpo1 = load_balance_get_bucket_i(lb1,
287                                            (hash_c1 &
288                                             (lb1->lb_n_buckets_minus_1)));
289           dpo2 = load_balance_get_bucket_i(lb2,
290                                            (hash_c2 &
291                                             (lb2->lb_n_buckets_minus_1)));
292           dpo3 = load_balance_get_bucket_i(lb3,
293                                            (hash_c3 &
294                                             (lb3->lb_n_buckets_minus_1)));
295
296           next0 = dpo0->dpoi_next_node;
297           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
298           next1 = dpo1->dpoi_next_node;
299           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
300           next2 = dpo2->dpoi_next_node;
301           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
302           next3 = dpo3->dpoi_next_node;
303           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
304
305           vlib_increment_combined_counter
306               (cm, cpu_index, lb_index0, 1,
307                vlib_buffer_length_in_chain (vm, p0)
308                + sizeof(ethernet_header_t));
309           vlib_increment_combined_counter
310               (cm, cpu_index, lb_index1, 1,
311                vlib_buffer_length_in_chain (vm, p1)
312                + sizeof(ethernet_header_t));
313           vlib_increment_combined_counter
314               (cm, cpu_index, lb_index2, 1,
315                vlib_buffer_length_in_chain (vm, p2)
316                + sizeof(ethernet_header_t));
317           vlib_increment_combined_counter
318               (cm, cpu_index, lb_index3, 1,
319                vlib_buffer_length_in_chain (vm, p3)
320                + sizeof(ethernet_header_t));
321
322           vlib_validate_buffer_enqueue_x4 (vm, node, next,
323                                            to_next, n_left_to_next,
324                                            pi0, pi1, pi2, pi3,
325                                            next0, next1, next2, next3);
326         }
327
328       while (n_left_from > 0 && n_left_to_next > 0)
329         {
330           vlib_buffer_t * p0;
331           ip4_header_t * ip0;
332           __attribute__((unused)) tcp_header_t * tcp0;
333           ip_lookup_next_t next0;
334           const load_balance_t *lb0;
335           ip4_fib_mtrie_t * mtrie0;
336           ip4_fib_mtrie_leaf_t leaf0;
337           ip4_address_t * dst_addr0;
338           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
339           flow_hash_config_t flow_hash_config0;
340           const dpo_id_t *dpo0;
341           u32 hash_c0;
342
343           pi0 = from[0];
344           to_next[0] = pi0;
345
346           p0 = vlib_get_buffer (vm, pi0);
347
348           ip0 = vlib_buffer_get_current (p0);
349
350           dst_addr0 = &ip0->dst_address;
351
352           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
354             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
355
356           if (! lookup_for_responses_to_locally_received_packets)
357             {
358               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
359
360               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
361
362               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
363             }
364
365           tcp0 = (void *) (ip0 + 1);
366
367           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
368                          || ip0->protocol == IP_PROTOCOL_UDP);
369
370           if (! lookup_for_responses_to_locally_received_packets)
371             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
372
373           if (! lookup_for_responses_to_locally_received_packets)
374             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
375
376           if (! lookup_for_responses_to_locally_received_packets)
377             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
378
379           if (lookup_for_responses_to_locally_received_packets)
380             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
381           else
382             {
383               /* Handle default route. */
384               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
385               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
386             }
387
388           lb0 = load_balance_get (lbi0);
389
390           /* Use flow hash to compute multipath adjacency. */
391           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
392           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
393             {
394               flow_hash_config0 = lb0->lb_hash_config;
395
396               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
397                 ip4_compute_flow_hash (ip0, flow_hash_config0);
398             }
399
400           ASSERT (lb0->lb_n_buckets > 0);
401           ASSERT (is_pow2 (lb0->lb_n_buckets));
402
403           dpo0 = load_balance_get_bucket_i(lb0,
404                                            (hash_c0 &
405                                             (lb0->lb_n_buckets_minus_1)));
406
407           next0 = dpo0->dpoi_next_node;
408           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
409
410           vlib_increment_combined_counter
411               (cm, cpu_index, lbi0, 1,
412                vlib_buffer_length_in_chain (vm, p0));
413
414           from += 1;
415           to_next += 1;
416           n_left_to_next -= 1;
417           n_left_from -= 1;
418
419           if (PREDICT_FALSE (next0 != next))
420             {
421               n_left_to_next += 1;
422               vlib_put_next_frame (vm, node, next, n_left_to_next);
423               next = next0;
424               vlib_get_next_frame (vm, node, next,
425                                    to_next, n_left_to_next);
426               to_next[0] = pi0;
427               to_next += 1;
428               n_left_to_next -= 1;
429             }
430         }
431
432       vlib_put_next_frame (vm, node, next, n_left_to_next);
433     }
434
435   if (node->flags & VLIB_NODE_FLAG_TRACE)
436     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
437
438   return frame->n_vectors;
439 }
440
441 /** @brief IPv4 lookup node.
442     @node ip4-lookup
443
444     This is the main IPv4 lookup dispatch node.
445
446     @param vm vlib_main_t corresponding to the current thread
447     @param node vlib_node_runtime_t
448     @param frame vlib_frame_t whose contents should be dispatched
449
450     @par Graph mechanics: buffer metadata, next index usage
451
452     @em Uses:
453     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
454         - Indicates the @c sw_if_index value of the interface that the
455           packet was received on.
456     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
457         - When the value is @c ~0 then the node performs a longest prefix
458           match (LPM) for the packet destination address in the FIB attached
459           to the receive interface.
460         - Otherwise perform LPM for the packet destination address in the
461           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
462           value (0, 1, ...) and not a VRF id.
463
464     @em Sets:
465     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
466         - The lookup result adjacency index.
467
468     <em>Next Index:</em>
469     - Dispatches the packet to the node index found in
470       ip_adjacency_t @c adj->lookup_next_index
471       (where @c adj is the lookup result adjacency).
472 */
473 static uword
474 ip4_lookup (vlib_main_t * vm,
475             vlib_node_runtime_t * node,
476             vlib_frame_t * frame)
477 {
478   return ip4_lookup_inline (vm, node, frame,
479                             /* lookup_for_responses_to_locally_received_packets */ 0);
480
481 }
482
483 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
484
485 VLIB_REGISTER_NODE (ip4_lookup_node) = {
486   .function = ip4_lookup,
487   .name = "ip4-lookup",
488   .vector_size = sizeof (u32),
489
490   .format_trace = format_ip4_lookup_trace,
491   .n_next_nodes = IP_LOOKUP_N_NEXT,
492   .next_nodes = IP4_LOOKUP_NEXT_NODES,
493 };
494
495 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
496
497 always_inline uword
498 ip4_load_balance (vlib_main_t * vm,
499                   vlib_node_runtime_t * node,
500                   vlib_frame_t * frame)
501 {
502   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
503   u32 n_left_from, n_left_to_next, * from, * to_next;
504   ip_lookup_next_t next;
505   u32 cpu_index = os_get_cpu_number();
506
507   from = vlib_frame_vector_args (frame);
508   n_left_from = frame->n_vectors;
509   next = node->cached_next_index;
510
511   if (node->flags & VLIB_NODE_FLAG_TRACE)
512       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
513
514   while (n_left_from > 0)
515     {
516       vlib_get_next_frame (vm, node, next,
517                            to_next, n_left_to_next);
518
519
520       while (n_left_from > 0 && n_left_to_next > 0)
521         {
522           ip_lookup_next_t next0;
523           const load_balance_t *lb0;
524           vlib_buffer_t * p0;
525           u32 pi0, lbi0, hc0;
526           const ip4_header_t *ip0;
527           const dpo_id_t *dpo0;
528
529           pi0 = from[0];
530           to_next[0] = pi0;
531
532           p0 = vlib_get_buffer (vm, pi0);
533
534           ip0 = vlib_buffer_get_current (p0);
535           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
536
537           lb0 = load_balance_get(lbi0);
538           hc0 = lb0->lb_hash_config;
539           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
540
541           dpo0 = load_balance_get_bucket_i(lb0,
542                                            vnet_buffer(p0)->ip.flow_hash &
543                                            (lb0->lb_n_buckets_minus_1));
544
545           next0 = dpo0->dpoi_next_node;
546           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
547
548           vlib_increment_combined_counter
549               (cm, cpu_index, lbi0, 1,
550                vlib_buffer_length_in_chain (vm, p0));
551
552           from += 1;
553           to_next += 1;
554           n_left_to_next -= 1;
555           n_left_from -= 1;
556
557           if (PREDICT_FALSE (next0 != next))
558             {
559               n_left_to_next += 1;
560               vlib_put_next_frame (vm, node, next, n_left_to_next);
561               next = next0;
562               vlib_get_next_frame (vm, node, next,
563                                    to_next, n_left_to_next);
564               to_next[0] = pi0;
565               to_next += 1;
566               n_left_to_next -= 1;
567             }
568         }
569
570       vlib_put_next_frame (vm, node, next, n_left_to_next);
571     }
572
573   return frame->n_vectors;
574 }
575
576 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
577
578 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
579   .function = ip4_load_balance,
580   .name = "ip4-load-balance",
581   .vector_size = sizeof (u32),
582   .sibling_of = "ip4-lookup",
583
584   .format_trace = format_ip4_forward_next_trace,
585 };
586
587 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
588
589 /* get first interface address */
590 ip4_address_t *
591 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
592                              ip_interface_address_t ** result_ia)
593 {
594   ip_lookup_main_t * lm = &im->lookup_main;
595   ip_interface_address_t * ia = 0;
596   ip4_address_t * result = 0;
597
598   foreach_ip_interface_address (lm, ia, sw_if_index,
599                                 1 /* honor unnumbered */,
600   ({
601     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
602     result = a;
603     break;
604   }));
605   if (result_ia)
606     *result_ia = result ? ia : 0;
607   return result;
608 }
609
610 static void
611 ip4_add_interface_routes (u32 sw_if_index,
612                           ip4_main_t * im, u32 fib_index,
613                           ip_interface_address_t * a)
614 {
615   ip_lookup_main_t * lm = &im->lookup_main;
616   ip4_address_t * address = ip_interface_address_get_address (lm, a);
617   fib_prefix_t pfx = {
618       .fp_len = a->address_length,
619       .fp_proto = FIB_PROTOCOL_IP4,
620       .fp_addr.ip4 = *address,
621   };
622
623   a->neighbor_probe_adj_index = ~0;
624
625   if (pfx.fp_len < 32)
626   {
627       fib_node_index_t fei;
628
629       fei = fib_table_entry_update_one_path(fib_index,
630                                             &pfx,
631                                             FIB_SOURCE_INTERFACE,
632                                             (FIB_ENTRY_FLAG_CONNECTED |
633                                              FIB_ENTRY_FLAG_ATTACHED),
634                                             FIB_PROTOCOL_IP4,
635                                             NULL, /* No next-hop address */
636                                             sw_if_index,
637                                             ~0, // invalid FIB index
638                                             1,
639                                             MPLS_LABEL_INVALID,
640                                             FIB_ROUTE_PATH_FLAG_NONE);
641       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
642   }
643
644   pfx.fp_len = 32;
645
646   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
647   {
648       u32 classify_table_index =
649           lm->classify_table_index_by_sw_if_index [sw_if_index];
650       if (classify_table_index != (u32) ~0)
651       {
652           dpo_id_t dpo = DPO_INVALID;
653
654           dpo_set(&dpo,
655                   DPO_CLASSIFY,
656                   DPO_PROTO_IP4,
657                   classify_dpo_create(DPO_PROTO_IP4,
658                                       classify_table_index));
659
660           fib_table_entry_special_dpo_add(fib_index,
661                                           &pfx,
662                                           FIB_SOURCE_CLASSIFY,
663                                           FIB_ENTRY_FLAG_NONE,
664                                           &dpo);
665           dpo_reset(&dpo);
666       }
667   }
668
669   fib_table_entry_update_one_path(fib_index,
670                                   &pfx,
671                                   FIB_SOURCE_INTERFACE,
672                                   (FIB_ENTRY_FLAG_CONNECTED |
673                                    FIB_ENTRY_FLAG_LOCAL),
674                                   FIB_PROTOCOL_IP4,
675                                   &pfx.fp_addr,
676                                   sw_if_index,
677                                   ~0, // invalid FIB index
678                                   1,
679                                   MPLS_LABEL_INVALID,
680                                   FIB_ROUTE_PATH_FLAG_NONE);
681 }
682
683 static void
684 ip4_del_interface_routes (ip4_main_t * im,
685                           u32 fib_index,
686                           ip4_address_t * address,
687                           u32 address_length)
688 {
689     fib_prefix_t pfx = {
690         .fp_len = address_length,
691         .fp_proto = FIB_PROTOCOL_IP4,
692         .fp_addr.ip4 = *address,
693     };
694
695     if (pfx.fp_len < 32)
696     {
697         fib_table_entry_delete(fib_index,
698                                &pfx,
699                                FIB_SOURCE_INTERFACE);
700     }
701
702     pfx.fp_len = 32;
703     fib_table_entry_delete(fib_index,
704                            &pfx,
705                            FIB_SOURCE_INTERFACE);
706 }
707
708 void
709 ip4_sw_interface_enable_disable (u32 sw_if_index,
710                                  u32 is_enable)
711 {
712   ip4_main_t * im = &ip4_main;
713
714   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
715
716   /*
717    * enable/disable only on the 1<->0 transition
718    */
719   if (is_enable)
720     {
721       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
722         return;
723     }
724   else
725     {
726       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
727       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
728         return;
729     }
730   vnet_feature_enable_disable ("ip4-unicast", "ip4-lookup", sw_if_index,
731                                is_enable, 0, 0);
732
733   vnet_feature_enable_disable ("ip4-multicast", "ip4-lookup-multicast", sw_if_index,
734                                is_enable, 0, 0);
735
736 }
737
738 static clib_error_t *
739 ip4_add_del_interface_address_internal (vlib_main_t * vm,
740                                         u32 sw_if_index,
741                                         ip4_address_t * address,
742                                         u32 address_length,
743                                         u32 is_del)
744 {
745   vnet_main_t * vnm = vnet_get_main();
746   ip4_main_t * im = &ip4_main;
747   ip_lookup_main_t * lm = &im->lookup_main;
748   clib_error_t * error = 0;
749   u32 if_address_index, elts_before;
750   ip4_address_fib_t ip4_af, * addr_fib = 0;
751
752   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
753   ip4_addr_fib_init (&ip4_af, address,
754                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
755   vec_add1 (addr_fib, ip4_af);
756
757   /* FIXME-LATER
758    * there is no support for adj-fib handling in the presence of overlapping
759    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
760    * most routers do.
761    */
762   if (! is_del)
763     {
764       /* When adding an address check that it does not conflict
765          with an existing address. */
766       ip_interface_address_t * ia;
767       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
768                                     0 /* honor unnumbered */,
769       ({
770         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
771
772         if (ip4_destination_matches_route (im, address, x, ia->address_length)
773             || ip4_destination_matches_route (im, x, address, address_length))
774           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
775                                     format_ip4_address_and_length, address, address_length,
776                                     format_ip4_address_and_length, x, ia->address_length,
777                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
778        }));
779     }
780
781   elts_before = pool_elts (lm->if_address_pool);
782
783   error = ip_interface_address_add_del
784     (lm,
785      sw_if_index,
786      addr_fib,
787      address_length,
788      is_del,
789      &if_address_index);
790   if (error)
791     goto done;
792
793   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
794
795   if (is_del)
796       ip4_del_interface_routes (im, ip4_af.fib_index, address,
797                                 address_length);
798   else
799       ip4_add_interface_routes (sw_if_index,
800                                 im, ip4_af.fib_index,
801                                 pool_elt_at_index
802                                 (lm->if_address_pool, if_address_index));
803
804   /* If pool did not grow/shrink: add duplicate address. */
805   if (elts_before != pool_elts (lm->if_address_pool))
806     {
807       ip4_add_del_interface_address_callback_t * cb;
808       vec_foreach (cb, im->add_del_interface_address_callbacks)
809         cb->function (im, cb->function_opaque, sw_if_index,
810                       address, address_length,
811                       if_address_index,
812                       is_del);
813     }
814
815  done:
816   vec_free (addr_fib);
817   return error;
818 }
819
820 clib_error_t *
821 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
822                                ip4_address_t * address, u32 address_length,
823                                u32 is_del)
824 {
825   return ip4_add_del_interface_address_internal
826     (vm, sw_if_index, address, address_length,
827      is_del);
828 }
829
830 /* Built-in ip4 unicast rx feature path definition */
831 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
832 {
833   .arc_name  = "ip4-unicast",
834   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
835   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
836 };
837
838 VNET_FEATURE_INIT (ip4_flow_classify, static) = {
839   .arc_name = "ip4-unicast",
840   .node_name = "ip4-flow-classify",
841   .runs_before = VNET_FEATURES ("ip4-inacl"),
842 };
843
844 VNET_FEATURE_INIT (ip4_inacl, static) = {
845   .arc_name = "ip4-unicast",
846   .node_name = "ip4-inacl",
847   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
848 };
849
850 VNET_FEATURE_INIT (ip4_source_check_1, static) = {
851   .arc_name = "ip4-unicast",
852   .node_name = "ip4-source-check-via-rx",
853   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
854 };
855
856 VNET_FEATURE_INIT (ip4_source_check_2, static) = {
857   .arc_name = "ip4-unicast",
858   .node_name = "ip4-source-check-via-any",
859   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
860 };
861
862 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
863   .arc_name = "ip4-unicast",
864   .node_name = "ip4-source-and-port-range-check-rx",
865   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
866 };
867
868 VNET_FEATURE_INIT (ip4_policer_classify, static) = {
869   .arc_name = "ip4-unicast",
870   .node_name = "ip4-policer-classify",
871   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
872 };
873
874 VNET_FEATURE_INIT (ip4_ipsec, static) = {
875   .arc_name = "ip4-unicast",
876   .node_name = "ipsec-input-ip4",
877   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
878 };
879
880 VNET_FEATURE_INIT (ip4_vpath, static) = {
881   .arc_name = "ip4-unicast",
882   .node_name = "vpath-input-ip4",
883   .runs_before = VNET_FEATURES ("ip4-lookup"),
884 };
885
886 VNET_FEATURE_INIT (ip4_lookup, static) = {
887   .arc_name = "ip4-unicast",
888   .node_name = "ip4-lookup",
889   .runs_before = VNET_FEATURES ("ip4-drop"),
890 };
891
892 VNET_FEATURE_INIT (ip4_drop, static) = {
893   .arc_name = "ip4-unicast",
894   .node_name = "ip4-drop",
895   .runs_before = 0, /* not before any other features */
896 };
897
898
899 /* Built-in ip4 multicast rx feature path definition */
900 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
901 {
902   .arc_name  = "ip4-multicast",
903   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
904   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
905 };
906
907 VNET_FEATURE_INIT (ip4_vpath_mc, static) = {
908   .arc_name = "ip4-multicast",
909   .node_name = "vpath-input-ip4",
910   .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
911 };
912
913 VNET_FEATURE_INIT (ip4_lookup_mc, static) = {
914   .arc_name = "ip4-multicast",
915   .node_name = "ip4-lookup-multicast",
916   .runs_before = VNET_FEATURES ("ip4-drop"),
917 };
918
919 VNET_FEATURE_INIT (ip4_mc_drop, static) = {
920   .arc_name = "ip4-multicast",
921   .node_name = "ip4-drop",
922   .runs_before = 0, /* last feature */
923 };
924
925 /* Source and port-range check ip4 tx feature path definition */
926 VNET_FEATURE_ARC_INIT (ip4_output, static) =
927 {
928   .arc_name  = "ip4-output",
929   .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"),
930   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
931 };
932
933 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
934   .arc_name = "ip4-output",
935   .node_name = "ip4-source-and-port-range-check-tx",
936   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
937 };
938
939 VNET_FEATURE_INIT (ip4_ipsec_output, static) = {
940   .arc_name = "ip4-output",
941   .node_name = "ipsec-output-ip4",
942   .runs_before = VNET_FEATURES ("interface-output"),
943 };
944
945 /* Built-in ip4 tx feature path definition */
946 VNET_FEATURE_INIT (ip4_interface_output, static) = {
947   .arc_name = "ip4-output",
948   .node_name = "interface-output",
949   .runs_before = 0, /* not before any other features */
950 };
951
952
953 static clib_error_t *
954 ip4_sw_interface_add_del (vnet_main_t * vnm,
955                           u32 sw_if_index,
956                           u32 is_add)
957 {
958   ip4_main_t * im = &ip4_main;
959
960   /* Fill in lookup tables with default table (0). */
961   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
962
963   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
964                                is_add, 0, 0);
965
966   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
967                                is_add, 0, 0);
968
969   vnet_feature_enable_disable ("ip4-output", "interface-output", sw_if_index,
970                                is_add, 0, 0);
971
972   return /* no error */ 0;
973 }
974
975 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
976
977 /* Global IP4 main. */
978 ip4_main_t ip4_main;
979
980 clib_error_t *
981 ip4_lookup_init (vlib_main_t * vm)
982 {
983   ip4_main_t * im = &ip4_main;
984   clib_error_t * error;
985   uword i;
986
987   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
988     return error;
989
990   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
991     {
992       u32 m;
993
994       if (i < 32)
995         m = pow2_mask (i) << (32 - i);
996       else
997         m = ~0;
998       im->fib_masks[i] = clib_host_to_net_u32 (m);
999     }
1000
1001   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1002
1003   /* Create FIB with index 0 and table id of 0. */
1004   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1005
1006   {
1007     pg_node_t * pn;
1008     pn = pg_get_node (ip4_lookup_node.index);
1009     pn->unformat_edit = unformat_pg_ip4_header;
1010   }
1011
1012   {
1013     ethernet_arp_header_t h;
1014
1015     memset (&h, 0, sizeof (h));
1016
1017     /* Set target ethernet address to all zeros. */
1018     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1019
1020 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1021 #define _8(f,v) h.f = v;
1022     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1023     _16 (l3_type, ETHERNET_TYPE_IP4);
1024     _8 (n_l2_address_bytes, 6);
1025     _8 (n_l3_address_bytes, 4);
1026     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1027 #undef _16
1028 #undef _8
1029
1030     vlib_packet_template_init (vm,
1031                                &im->ip4_arp_request_packet_template,
1032                                /* data */ &h,
1033                                sizeof (h),
1034                                /* alloc chunk size */ 8,
1035                                "ip4 arp");
1036   }
1037
1038   return error;
1039 }
1040
1041 VLIB_INIT_FUNCTION (ip4_lookup_init);
1042
1043 typedef struct {
1044   /* Adjacency taken. */
1045   u32 dpo_index;
1046   u32 flow_hash;
1047   u32 fib_index;
1048
1049   /* Packet data, possibly *after* rewrite. */
1050   u8 packet_data[64 - 1*sizeof(u32)];
1051 } ip4_forward_next_trace_t;
1052
1053 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1054 {
1055   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1056   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1057   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1058   uword indent = format_get_indent (s);
1059   s = format (s, "%U%U",
1060               format_white_space, indent,
1061               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1062   return s;
1063 }
1064
1065 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1066 {
1067   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1068   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1069   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1070   uword indent = format_get_indent (s);
1071
1072   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1073               t->fib_index, t->dpo_index, t->flow_hash);
1074   s = format (s, "\n%U%U",
1075               format_white_space, indent,
1076               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1077   return s;
1078 }
1079
1080 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1081 {
1082   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1083   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1084   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1085   vnet_main_t * vnm = vnet_get_main();
1086   uword indent = format_get_indent (s);
1087
1088   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1089               t->fib_index, t->dpo_index, format_ip_adjacency,
1090               t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1091               t->flow_hash);
1092   s = format (s, "\n%U%U",
1093               format_white_space, indent,
1094               format_ip_adjacency_packet_data,
1095               vnm, t->dpo_index,
1096               t->packet_data, sizeof (t->packet_data));
1097   return s;
1098 }
1099
1100 /* Common trace function for all ip4-forward next nodes. */
1101 void
1102 ip4_forward_next_trace (vlib_main_t * vm,
1103                         vlib_node_runtime_t * node,
1104                         vlib_frame_t * frame,
1105                         vlib_rx_or_tx_t which_adj_index)
1106 {
1107   u32 * from, n_left;
1108   ip4_main_t * im = &ip4_main;
1109
1110   n_left = frame->n_vectors;
1111   from = vlib_frame_vector_args (frame);
1112
1113   while (n_left >= 4)
1114     {
1115       u32 bi0, bi1;
1116       vlib_buffer_t * b0, * b1;
1117       ip4_forward_next_trace_t * t0, * t1;
1118
1119       /* Prefetch next iteration. */
1120       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1121       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1122
1123       bi0 = from[0];
1124       bi1 = from[1];
1125
1126       b0 = vlib_get_buffer (vm, bi0);
1127       b1 = vlib_get_buffer (vm, bi1);
1128
1129       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1130         {
1131           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1132           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1133           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1134           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1135               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1136               vec_elt (im->fib_index_by_sw_if_index,
1137                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1138
1139           clib_memcpy (t0->packet_data,
1140                   vlib_buffer_get_current (b0),
1141                   sizeof (t0->packet_data));
1142         }
1143       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1144         {
1145           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1146           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1147           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1148           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1149               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1150               vec_elt (im->fib_index_by_sw_if_index,
1151                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1152           clib_memcpy (t1->packet_data,
1153                   vlib_buffer_get_current (b1),
1154                   sizeof (t1->packet_data));
1155         }
1156       from += 2;
1157       n_left -= 2;
1158     }
1159
1160   while (n_left >= 1)
1161     {
1162       u32 bi0;
1163       vlib_buffer_t * b0;
1164       ip4_forward_next_trace_t * t0;
1165
1166       bi0 = from[0];
1167
1168       b0 = vlib_get_buffer (vm, bi0);
1169
1170       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1171         {
1172           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1173           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1174           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1175           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1176               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1177               vec_elt (im->fib_index_by_sw_if_index,
1178                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1179           clib_memcpy (t0->packet_data,
1180                   vlib_buffer_get_current (b0),
1181                   sizeof (t0->packet_data));
1182         }
1183       from += 1;
1184       n_left -= 1;
1185     }
1186 }
1187
1188 static uword
1189 ip4_drop_or_punt (vlib_main_t * vm,
1190                   vlib_node_runtime_t * node,
1191                   vlib_frame_t * frame,
1192                   ip4_error_t error_code)
1193 {
1194   u32 * buffers = vlib_frame_vector_args (frame);
1195   uword n_packets = frame->n_vectors;
1196
1197   vlib_error_drop_buffers (vm, node,
1198                            buffers,
1199                            /* stride */ 1,
1200                            n_packets,
1201                            /* next */ 0,
1202                            ip4_input_node.index,
1203                            error_code);
1204
1205   if (node->flags & VLIB_NODE_FLAG_TRACE)
1206     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1207
1208   return n_packets;
1209 }
1210
1211 static uword
1212 ip4_drop (vlib_main_t * vm,
1213           vlib_node_runtime_t * node,
1214           vlib_frame_t * frame)
1215 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1216
1217 static uword
1218 ip4_punt (vlib_main_t * vm,
1219           vlib_node_runtime_t * node,
1220           vlib_frame_t * frame)
1221 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1222
1223 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1224   .function = ip4_drop,
1225   .name = "ip4-drop",
1226   .vector_size = sizeof (u32),
1227
1228   .format_trace = format_ip4_forward_next_trace,
1229
1230   .n_next_nodes = 1,
1231   .next_nodes = {
1232     [0] = "error-drop",
1233   },
1234 };
1235
1236 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1237
1238 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1239   .function = ip4_punt,
1240   .name = "ip4-punt",
1241   .vector_size = sizeof (u32),
1242
1243   .format_trace = format_ip4_forward_next_trace,
1244
1245   .n_next_nodes = 1,
1246   .next_nodes = {
1247     [0] = "error-punt",
1248   },
1249 };
1250
1251 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1252
1253 /* Compute TCP/UDP/ICMP4 checksum in software. */
1254 u16
1255 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1256                               ip4_header_t * ip0)
1257 {
1258   ip_csum_t sum0;
1259   u32 ip_header_length, payload_length_host_byte_order;
1260   u32 n_this_buffer, n_bytes_left;
1261   u16 sum16;
1262   void * data_this_buffer;
1263
1264   /* Initialize checksum with ip header. */
1265   ip_header_length = ip4_header_bytes (ip0);
1266   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1267   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1268
1269   if (BITS (uword) == 32)
1270     {
1271       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1272       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1273     }
1274   else
1275     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1276
1277   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1278   data_this_buffer = (void *) ip0 + ip_header_length;
1279   if (n_this_buffer + ip_header_length > p0->current_length)
1280     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1281   while (1)
1282     {
1283       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1284       n_bytes_left -= n_this_buffer;
1285       if (n_bytes_left == 0)
1286         break;
1287
1288       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1289       p0 = vlib_get_buffer (vm, p0->next_buffer);
1290       data_this_buffer = vlib_buffer_get_current (p0);
1291       n_this_buffer = p0->current_length;
1292     }
1293
1294   sum16 = ~ ip_csum_fold (sum0);
1295
1296   return sum16;
1297 }
1298
1299 static u32
1300 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1301 {
1302   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1303   udp_header_t * udp0;
1304   u16 sum16;
1305
1306   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1307           || ip0->protocol == IP_PROTOCOL_UDP);
1308
1309   udp0 = (void *) (ip0 + 1);
1310   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1311     {
1312       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1313                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1314       return p0->flags;
1315     }
1316
1317   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1318
1319   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1320                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1321
1322   return p0->flags;
1323 }
1324
1325 static uword
1326 ip4_local (vlib_main_t * vm,
1327            vlib_node_runtime_t * node,
1328            vlib_frame_t * frame)
1329 {
1330   ip4_main_t * im = &ip4_main;
1331   ip_lookup_main_t * lm = &im->lookup_main;
1332   ip_local_next_t next_index;
1333   u32 * from, * to_next, n_left_from, n_left_to_next;
1334   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1335
1336   from = vlib_frame_vector_args (frame);
1337   n_left_from = frame->n_vectors;
1338   next_index = node->cached_next_index;
1339
1340   if (node->flags & VLIB_NODE_FLAG_TRACE)
1341     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1342
1343   while (n_left_from > 0)
1344     {
1345       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1346
1347       while (n_left_from >= 4 && n_left_to_next >= 2)
1348         {
1349           vlib_buffer_t * p0, * p1;
1350           ip4_header_t * ip0, * ip1;
1351           udp_header_t * udp0, * udp1;
1352           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1353           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1354           const dpo_id_t *dpo0, *dpo1;
1355           const load_balance_t *lb0, *lb1;
1356           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1357           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1358           i32 len_diff0, len_diff1;
1359           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1360           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1361           u8 enqueue_code;
1362
1363           pi0 = to_next[0] = from[0];
1364           pi1 = to_next[1] = from[1];
1365           from += 2;
1366           n_left_from -= 2;
1367           to_next += 2;
1368           n_left_to_next -= 2;
1369
1370           p0 = vlib_get_buffer (vm, pi0);
1371           p1 = vlib_get_buffer (vm, pi1);
1372
1373           ip0 = vlib_buffer_get_current (p0);
1374           ip1 = vlib_buffer_get_current (p1);
1375
1376           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1377                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1378           fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1379                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1380
1381           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1382           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1383
1384           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1385
1386           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1387           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1388
1389           /* Treat IP frag packets as "experimental" protocol for now
1390              until support of IP frag reassembly is implemented */
1391           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1392           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1393           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1394           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1395           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1396           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1397
1398           flags0 = p0->flags;
1399           flags1 = p1->flags;
1400
1401           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1402           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1403
1404           udp0 = ip4_next_header (ip0);
1405           udp1 = ip4_next_header (ip1);
1406
1407           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1408           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1409           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1410
1411           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1412           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1413
1414           /* Verify UDP length. */
1415           ip_len0 = clib_net_to_host_u16 (ip0->length);
1416           ip_len1 = clib_net_to_host_u16 (ip1->length);
1417           udp_len0 = clib_net_to_host_u16 (udp0->length);
1418           udp_len1 = clib_net_to_host_u16 (udp1->length);
1419
1420           len_diff0 = ip_len0 - udp_len0;
1421           len_diff1 = ip_len1 - udp_len1;
1422
1423           len_diff0 = is_udp0 ? len_diff0 : 0;
1424           len_diff1 = is_udp1 ? len_diff1 : 0;
1425
1426           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1427                                 & good_tcp_udp0 & good_tcp_udp1)))
1428             {
1429               if (is_tcp_udp0)
1430                 {
1431                   if (is_tcp_udp0
1432                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1433                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1434                   good_tcp_udp0 =
1435                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1436                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1437                 }
1438               if (is_tcp_udp1)
1439                 {
1440                   if (is_tcp_udp1
1441                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1442                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1443                   good_tcp_udp1 =
1444                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1445                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1446                 }
1447             }
1448
1449           good_tcp_udp0 &= len_diff0 >= 0;
1450           good_tcp_udp1 &= len_diff1 >= 0;
1451
1452           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1453           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1454
1455           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1456
1457           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1458           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1459
1460           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1461           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1462                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1463                     : error0);
1464           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1465                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1466                     : error1);
1467
1468           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1469           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1470           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1471           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1472
1473           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1474           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1475
1476           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1477           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1478
1479           lb0 = load_balance_get(lbi0);
1480           lb1 = load_balance_get(lbi1);
1481           dpo0 = load_balance_get_bucket_i(lb0, 0);
1482           dpo1 = load_balance_get_bucket_i(lb1, 0);
1483
1484           /*
1485            * Must have a route to source otherwise we drop the packet.
1486            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1487            *
1488            * The checks are:
1489            *  - the source is a recieve => it's from us => bogus, do this
1490            *    first since it sets a different error code.
1491            *  - uRPF check for any route to source - accept if passes.
1492            *  - allow packets destined to the broadcast address from unknown sources
1493            */
1494           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1495                      dpo0->dpoi_type == DPO_RECEIVE) ?
1496                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1497                     error0);
1498           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1499                      !fib_urpf_check_size(lb0->lb_urpf) &&
1500                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1501                     ? IP4_ERROR_SRC_LOOKUP_MISS
1502                     : error0);
1503           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1504                      dpo1->dpoi_type == DPO_RECEIVE) ?
1505                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1506                     error1);
1507           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1508                      !fib_urpf_check_size(lb1->lb_urpf) &&
1509                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1510                     ? IP4_ERROR_SRC_LOOKUP_MISS
1511                     : error1);
1512
1513           next0 = lm->local_next_by_ip_protocol[proto0];
1514           next1 = lm->local_next_by_ip_protocol[proto1];
1515
1516           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1517           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1518
1519           p0->error = error0 ? error_node->errors[error0] : 0;
1520           p1->error = error1 ? error_node->errors[error1] : 0;
1521
1522           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1523
1524           if (PREDICT_FALSE (enqueue_code != 0))
1525             {
1526               switch (enqueue_code)
1527                 {
1528                 case 1:
1529                   /* A B A */
1530                   to_next[-2] = pi1;
1531                   to_next -= 1;
1532                   n_left_to_next += 1;
1533                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1534                   break;
1535
1536                 case 2:
1537                   /* A A B */
1538                   to_next -= 1;
1539                   n_left_to_next += 1;
1540                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1541                   break;
1542
1543                 case 3:
1544                   /* A B B or A B C */
1545                   to_next -= 2;
1546                   n_left_to_next += 2;
1547                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1548                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1549                   if (next0 == next1)
1550                     {
1551                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1552                       next_index = next1;
1553                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1554                     }
1555                   break;
1556                 }
1557             }
1558         }
1559
1560       while (n_left_from > 0 && n_left_to_next > 0)
1561         {
1562           vlib_buffer_t * p0;
1563           ip4_header_t * ip0;
1564           udp_header_t * udp0;
1565           ip4_fib_mtrie_t * mtrie0;
1566           ip4_fib_mtrie_leaf_t leaf0;
1567           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1568           i32 len_diff0;
1569           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1570           load_balance_t *lb0;
1571           const dpo_id_t *dpo0;
1572
1573           pi0 = to_next[0] = from[0];
1574           from += 1;
1575           n_left_from -= 1;
1576           to_next += 1;
1577           n_left_to_next -= 1;
1578
1579           p0 = vlib_get_buffer (vm, pi0);
1580
1581           ip0 = vlib_buffer_get_current (p0);
1582
1583           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1584                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1585
1586           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1587
1588           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1589
1590           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1591
1592           /* Treat IP frag packets as "experimental" protocol for now
1593              until support of IP frag reassembly is implemented */
1594           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1595           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1596           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1597
1598           flags0 = p0->flags;
1599
1600           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1601
1602           udp0 = ip4_next_header (ip0);
1603
1604           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1605           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1606
1607           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1608
1609           /* Verify UDP length. */
1610           ip_len0 = clib_net_to_host_u16 (ip0->length);
1611           udp_len0 = clib_net_to_host_u16 (udp0->length);
1612
1613           len_diff0 = ip_len0 - udp_len0;
1614
1615           len_diff0 = is_udp0 ? len_diff0 : 0;
1616
1617           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1618             {
1619               if (is_tcp_udp0)
1620                 {
1621                   if (is_tcp_udp0
1622                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1623                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1624                   good_tcp_udp0 =
1625                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1626                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1627                 }
1628             }
1629
1630           good_tcp_udp0 &= len_diff0 >= 0;
1631
1632           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1633
1634           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1635
1636           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1637
1638           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1639           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1640                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1641                     : error0);
1642
1643           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1644           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1645
1646           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1647           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1648
1649           lb0 = load_balance_get(lbi0);
1650           dpo0 = load_balance_get_bucket_i(lb0, 0);
1651
1652           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1653               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1654                   dpo0->dpoi_index;
1655
1656           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657                      dpo0->dpoi_type == DPO_RECEIVE) ?
1658                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1659                     error0);
1660           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661                      !fib_urpf_check_size(lb0->lb_urpf) &&
1662                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1663                     ? IP4_ERROR_SRC_LOOKUP_MISS
1664                     : error0);
1665
1666           next0 = lm->local_next_by_ip_protocol[proto0];
1667
1668           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1669
1670           p0->error = error0? error_node->errors[error0] : 0;
1671
1672           if (PREDICT_FALSE (next0 != next_index))
1673             {
1674               n_left_to_next += 1;
1675               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1676
1677               next_index = next0;
1678               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1679               to_next[0] = pi0;
1680               to_next += 1;
1681               n_left_to_next -= 1;
1682             }
1683         }
1684
1685       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1686     }
1687
1688   return frame->n_vectors;
1689 }
1690
1691 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1692   .function = ip4_local,
1693   .name = "ip4-local",
1694   .vector_size = sizeof (u32),
1695
1696   .format_trace = format_ip4_forward_next_trace,
1697
1698   .n_next_nodes = IP_LOCAL_N_NEXT,
1699   .next_nodes = {
1700     [IP_LOCAL_NEXT_DROP] = "error-drop",
1701     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1702     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1703     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1704   },
1705 };
1706
1707 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1708
1709 void ip4_register_protocol (u32 protocol, u32 node_index)
1710 {
1711   vlib_main_t * vm = vlib_get_main();
1712   ip4_main_t * im = &ip4_main;
1713   ip_lookup_main_t * lm = &im->lookup_main;
1714
1715   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1716   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1717 }
1718
1719 static clib_error_t *
1720 show_ip_local_command_fn (vlib_main_t * vm,
1721                           unformat_input_t * input,
1722                          vlib_cli_command_t * cmd)
1723 {
1724   ip4_main_t * im = &ip4_main;
1725   ip_lookup_main_t * lm = &im->lookup_main;
1726   int i;
1727
1728   vlib_cli_output (vm, "Protocols handled by ip4_local");
1729   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1730     {
1731       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1732         vlib_cli_output (vm, "%d", i);
1733     }
1734   return 0;
1735 }
1736
1737
1738
1739 /*?
1740  * Display the set of protocols handled by the local IPv4 stack.
1741  *
1742  * @cliexpar
1743  * Example of how to display local protocol table:
1744  * @cliexstart{show ip local}
1745  * Protocols handled by ip4_local
1746  * 1
1747  * 17
1748  * 47
1749  * @cliexend
1750 ?*/
1751 /* *INDENT-OFF* */
1752 VLIB_CLI_COMMAND (show_ip_local, static) = {
1753   .path = "show ip local",
1754   .function = show_ip_local_command_fn,
1755   .short_help = "show ip local",
1756 };
1757 /* *INDENT-ON* */
1758
1759 always_inline uword
1760 ip4_arp_inline (vlib_main_t * vm,
1761                 vlib_node_runtime_t * node,
1762                 vlib_frame_t * frame,
1763                 int is_glean)
1764 {
1765   vnet_main_t * vnm = vnet_get_main();
1766   ip4_main_t * im = &ip4_main;
1767   ip_lookup_main_t * lm = &im->lookup_main;
1768   u32 * from, * to_next_drop;
1769   uword n_left_from, n_left_to_next_drop, next_index;
1770   static f64 time_last_seed_change = -1e100;
1771   static u32 hash_seeds[3];
1772   static uword hash_bitmap[256 / BITS (uword)];
1773   f64 time_now;
1774
1775   if (node->flags & VLIB_NODE_FLAG_TRACE)
1776     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1777
1778   time_now = vlib_time_now (vm);
1779   if (time_now - time_last_seed_change > 1e-3)
1780     {
1781       uword i;
1782       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1783                                              sizeof (hash_seeds));
1784       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1785         hash_seeds[i] = r[i];
1786
1787       /* Mark all hash keys as been no-seen before. */
1788       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1789         hash_bitmap[i] = 0;
1790
1791       time_last_seed_change = time_now;
1792     }
1793
1794   from = vlib_frame_vector_args (frame);
1795   n_left_from = frame->n_vectors;
1796   next_index = node->cached_next_index;
1797   if (next_index == IP4_ARP_NEXT_DROP)
1798     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1799
1800   while (n_left_from > 0)
1801     {
1802       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1803                            to_next_drop, n_left_to_next_drop);
1804
1805       while (n_left_from > 0 && n_left_to_next_drop > 0)
1806         {
1807           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1808           ip_adjacency_t * adj0;
1809           vlib_buffer_t * p0;
1810           ip4_header_t * ip0;
1811           uword bm0;
1812
1813           pi0 = from[0];
1814
1815           p0 = vlib_get_buffer (vm, pi0);
1816
1817           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1818           adj0 = ip_get_adjacency (lm, adj_index0);
1819           ip0 = vlib_buffer_get_current (p0);
1820
1821           a0 = hash_seeds[0];
1822           b0 = hash_seeds[1];
1823           c0 = hash_seeds[2];
1824
1825           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1826           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1827
1828           if (is_glean)
1829           {
1830               /*
1831                * this is the Glean case, so we are ARPing for the
1832                * packet's destination
1833                */
1834               a0 ^= ip0->dst_address.data_u32;
1835           }
1836           else
1837           {
1838               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1839           }
1840           b0 ^= sw_if_index0;
1841
1842           hash_v3_finalize32 (a0, b0, c0);
1843
1844           c0 &= BITS (hash_bitmap) - 1;
1845           c0 = c0 / BITS (uword);
1846           m0 = (uword) 1 << (c0 % BITS (uword));
1847
1848           bm0 = hash_bitmap[c0];
1849           drop0 = (bm0 & m0) != 0;
1850
1851           /* Mark it as seen. */
1852           hash_bitmap[c0] = bm0 | m0;
1853
1854           from += 1;
1855           n_left_from -= 1;
1856           to_next_drop[0] = pi0;
1857           to_next_drop += 1;
1858           n_left_to_next_drop -= 1;
1859
1860           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1861
1862           /*
1863            * the adj has been updated to a rewrite but the node the DPO that got
1864            * us here hasn't - yet. no big deal. we'll drop while we wait.
1865            */
1866           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1867             continue;
1868
1869           if (drop0)
1870             continue;
1871
1872           /*
1873            * Can happen if the control-plane is programming tables
1874            * with traffic flowing; at least that's today's lame excuse.
1875            */
1876           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1877               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1878           {
1879             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1880           }
1881           else
1882           /* Send ARP request. */
1883           {
1884             u32 bi0 = 0;
1885             vlib_buffer_t * b0;
1886             ethernet_arp_header_t * h0;
1887             vnet_hw_interface_t * hw_if0;
1888
1889             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1890
1891             /* Add rewrite/encap string for ARP packet. */
1892             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1893
1894             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1895
1896             /* Src ethernet address in ARP header. */
1897             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1898                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1899
1900             if (is_glean)
1901             {
1902                 /* The interface's source address is stashed in the Glean Adj */
1903                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1904
1905                 /* Copy in destination address we are requesting. This is the
1906                 * glean case, so it's the packet's destination.*/
1907                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1908             }
1909             else
1910             {
1911                 /* Src IP address in ARP header. */
1912                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1913                                                &h0->ip4_over_ethernet[0].ip4))
1914                 {
1915                     /* No source address available */
1916                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1917                     vlib_buffer_free(vm, &bi0, 1);
1918                     continue;
1919                 }
1920
1921                 /* Copy in destination address we are requesting from the
1922                    incomplete adj */
1923                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1924                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1925             }
1926
1927             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1928             b0 = vlib_get_buffer (vm, bi0);
1929             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1930
1931             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1932
1933             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1934           }
1935         }
1936
1937       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1938     }
1939
1940   return frame->n_vectors;
1941 }
1942
1943 static uword
1944 ip4_arp (vlib_main_t * vm,
1945          vlib_node_runtime_t * node,
1946          vlib_frame_t * frame)
1947 {
1948     return (ip4_arp_inline(vm, node, frame, 0));
1949 }
1950
1951 static uword
1952 ip4_glean (vlib_main_t * vm,
1953            vlib_node_runtime_t * node,
1954            vlib_frame_t * frame)
1955 {
1956     return (ip4_arp_inline(vm, node, frame, 1));
1957 }
1958
1959 static char * ip4_arp_error_strings[] = {
1960   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1961   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1962   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1963   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1964   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1965   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1966 };
1967
1968 VLIB_REGISTER_NODE (ip4_arp_node) = {
1969   .function = ip4_arp,
1970   .name = "ip4-arp",
1971   .vector_size = sizeof (u32),
1972
1973   .format_trace = format_ip4_forward_next_trace,
1974
1975   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1976   .error_strings = ip4_arp_error_strings,
1977
1978   .n_next_nodes = IP4_ARP_N_NEXT,
1979   .next_nodes = {
1980     [IP4_ARP_NEXT_DROP] = "error-drop",
1981   },
1982 };
1983
1984 VLIB_REGISTER_NODE (ip4_glean_node) = {
1985   .function = ip4_glean,
1986   .name = "ip4-glean",
1987   .vector_size = sizeof (u32),
1988
1989   .format_trace = format_ip4_forward_next_trace,
1990
1991   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1992   .error_strings = ip4_arp_error_strings,
1993
1994   .n_next_nodes = IP4_ARP_N_NEXT,
1995   .next_nodes = {
1996     [IP4_ARP_NEXT_DROP] = "error-drop",
1997   },
1998 };
1999
2000 #define foreach_notrace_ip4_arp_error           \
2001 _(DROP)                                         \
2002 _(REQUEST_SENT)                                 \
2003 _(REPLICATE_DROP)                               \
2004 _(REPLICATE_FAIL)
2005
2006 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2007 {
2008   vlib_node_runtime_t *rt =
2009     vlib_node_get_runtime (vm, ip4_arp_node.index);
2010
2011   /* don't trace ARP request packets */
2012 #define _(a)                                    \
2013     vnet_pcap_drop_trace_filter_add_del         \
2014         (rt->errors[IP4_ARP_ERROR_##a],         \
2015          1 /* is_add */);
2016     foreach_notrace_ip4_arp_error;
2017 #undef _
2018   return 0;
2019 }
2020
2021 VLIB_INIT_FUNCTION(arp_notrace_init);
2022
2023
2024 /* Send an ARP request to see if given destination is reachable on given interface. */
2025 clib_error_t *
2026 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2027 {
2028   vnet_main_t * vnm = vnet_get_main();
2029   ip4_main_t * im = &ip4_main;
2030   ethernet_arp_header_t * h;
2031   ip4_address_t * src;
2032   ip_interface_address_t * ia;
2033   ip_adjacency_t * adj;
2034   vnet_hw_interface_t * hi;
2035   vnet_sw_interface_t * si;
2036   vlib_buffer_t * b;
2037   u32 bi = 0;
2038
2039   si = vnet_get_sw_interface (vnm, sw_if_index);
2040
2041   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2042     {
2043       return clib_error_return (0, "%U: interface %U down",
2044                                 format_ip4_address, dst,
2045                                 format_vnet_sw_if_index_name, vnm,
2046                                 sw_if_index);
2047     }
2048
2049   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2050   if (! src)
2051     {
2052       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2053       return clib_error_return
2054         (0, "no matching interface address for destination %U (interface %U)",
2055          format_ip4_address, dst,
2056          format_vnet_sw_if_index_name, vnm, sw_if_index);
2057     }
2058
2059   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2060
2061   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2062
2063   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2064
2065   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2066
2067   h->ip4_over_ethernet[0].ip4 = src[0];
2068   h->ip4_over_ethernet[1].ip4 = dst[0];
2069
2070   b = vlib_get_buffer (vm, bi);
2071   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2072
2073   /* Add encapsulation string for software interface (e.g. ethernet header). */
2074   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2075   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2076
2077   {
2078     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2079     u32 * to_next = vlib_frame_vector_args (f);
2080     to_next[0] = bi;
2081     f->n_vectors = 1;
2082     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2083   }
2084
2085   return /* no error */ 0;
2086 }
2087
2088 typedef enum {
2089   IP4_REWRITE_NEXT_DROP,
2090   IP4_REWRITE_NEXT_ARP,
2091   IP4_REWRITE_NEXT_ICMP_ERROR,
2092 } ip4_rewrite_next_t;
2093
2094 always_inline uword
2095 ip4_rewrite_inline (vlib_main_t * vm,
2096                     vlib_node_runtime_t * node,
2097                     vlib_frame_t * frame,
2098                     int rewrite_for_locally_received_packets,
2099                     int is_midchain)
2100 {
2101   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2102   u32 * from = vlib_frame_vector_args (frame);
2103   u32 n_left_from, n_left_to_next, * to_next, next_index;
2104   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2105   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2106
2107   n_left_from = frame->n_vectors;
2108   next_index = node->cached_next_index;
2109   u32 cpu_index = os_get_cpu_number();
2110
2111   while (n_left_from > 0)
2112     {
2113       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2114
2115       while (n_left_from >= 4 && n_left_to_next >= 2)
2116         {
2117           ip_adjacency_t * adj0, * adj1;
2118           vlib_buffer_t * p0, * p1;
2119           ip4_header_t * ip0, * ip1;
2120           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2121           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2122           u32 next0_override, next1_override;
2123           u32 tx_sw_if_index0, tx_sw_if_index1;
2124
2125           if (rewrite_for_locally_received_packets)
2126               next0_override = next1_override = 0;
2127
2128           /* Prefetch next iteration. */
2129           {
2130             vlib_buffer_t * p2, * p3;
2131
2132             p2 = vlib_get_buffer (vm, from[2]);
2133             p3 = vlib_get_buffer (vm, from[3]);
2134
2135             vlib_prefetch_buffer_header (p2, STORE);
2136             vlib_prefetch_buffer_header (p3, STORE);
2137
2138             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2139             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2140           }
2141
2142           pi0 = to_next[0] = from[0];
2143           pi1 = to_next[1] = from[1];
2144
2145           from += 2;
2146           n_left_from -= 2;
2147           to_next += 2;
2148           n_left_to_next -= 2;
2149
2150           p0 = vlib_get_buffer (vm, pi0);
2151           p1 = vlib_get_buffer (vm, pi1);
2152
2153           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2154           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2155
2156           /* We should never rewrite a pkt using the MISS adjacency */
2157           ASSERT(adj_index0 && adj_index1);
2158
2159           ip0 = vlib_buffer_get_current (p0);
2160           ip1 = vlib_buffer_get_current (p1);
2161
2162           error0 = error1 = IP4_ERROR_NONE;
2163           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2164
2165           /* Decrement TTL & update checksum.
2166              Works either endian, so no need for byte swap. */
2167           if (! rewrite_for_locally_received_packets)
2168             {
2169               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2170
2171               /* Input node should have reject packets with ttl 0. */
2172               ASSERT (ip0->ttl > 0);
2173               ASSERT (ip1->ttl > 0);
2174
2175               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2176               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2177
2178               checksum0 += checksum0 >= 0xffff;
2179               checksum1 += checksum1 >= 0xffff;
2180
2181               ip0->checksum = checksum0;
2182               ip1->checksum = checksum1;
2183
2184               ttl0 -= 1;
2185               ttl1 -= 1;
2186
2187               ip0->ttl = ttl0;
2188               ip1->ttl = ttl1;
2189
2190               /*
2191                * If the ttl drops below 1 when forwarding, generate
2192                * an ICMP response.
2193                */
2194               if (PREDICT_FALSE(ttl0 <= 0))
2195                 {
2196                   error0 = IP4_ERROR_TIME_EXPIRED;
2197                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2198                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2199                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2200                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2201                 }
2202               if (PREDICT_FALSE(ttl1 <= 0))
2203                 {
2204                   error1 = IP4_ERROR_TIME_EXPIRED;
2205                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2206                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2207                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2208                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2209                 }
2210
2211               /* Verify checksum. */
2212               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2213               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2214             }
2215
2216           /* Rewrite packet header and updates lengths. */
2217           adj0 = ip_get_adjacency (lm, adj_index0);
2218           adj1 = ip_get_adjacency (lm, adj_index1);
2219
2220           if (rewrite_for_locally_received_packets)
2221             {
2222               if (PREDICT_FALSE(adj0->lookup_next_index
2223                                 == IP_LOOKUP_NEXT_ARP))
2224                 next0_override = IP4_REWRITE_NEXT_ARP;
2225               if (PREDICT_FALSE(adj1->lookup_next_index
2226                                 == IP_LOOKUP_NEXT_ARP))
2227                 next1_override = IP4_REWRITE_NEXT_ARP;
2228             }
2229
2230           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2231           rw_len0 = adj0[0].rewrite_header.data_bytes;
2232           rw_len1 = adj1[0].rewrite_header.data_bytes;
2233           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2234           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2235
2236           /* Check MTU of outgoing interface. */
2237           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2238                     ? IP4_ERROR_MTU_EXCEEDED
2239                     : error0);
2240           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2241                     ? IP4_ERROR_MTU_EXCEEDED
2242                     : error1);
2243
2244           next0 = (error0 == IP4_ERROR_NONE)
2245             ? adj0[0].rewrite_header.next_index : next0;
2246
2247           if (rewrite_for_locally_received_packets)
2248               next0 = next0 && next0_override ? next0_override : next0;
2249
2250           next1 = (error1 == IP4_ERROR_NONE)
2251             ? adj1[0].rewrite_header.next_index : next1;
2252
2253           if (rewrite_for_locally_received_packets)
2254               next1 = next1 && next1_override ? next1_override : next1;
2255
2256           /*
2257            * We've already accounted for an ethernet_header_t elsewhere
2258            */
2259           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2260               vlib_increment_combined_counter
2261                   (&adjacency_counters,
2262                    cpu_index, adj_index0,
2263                    /* packet increment */ 0,
2264                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2265
2266           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2267               vlib_increment_combined_counter
2268                   (&adjacency_counters,
2269                    cpu_index, adj_index1,
2270                    /* packet increment */ 0,
2271                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2272
2273           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2274            * to see the IP headerr */
2275           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2276             {
2277               p0->current_data -= rw_len0;
2278               p0->current_length += rw_len0;
2279               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2280               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2281                   tx_sw_if_index0;
2282
2283               vnet_feature_arc_start(lm->output_feature_arc_index,
2284                                      tx_sw_if_index0, &next0, p0);
2285             }
2286           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2287             {
2288               p1->current_data -= rw_len1;
2289               p1->current_length += rw_len1;
2290
2291               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2292               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2293                   tx_sw_if_index1;
2294
2295               vnet_feature_arc_start(lm->output_feature_arc_index,
2296                                      tx_sw_if_index1, &next1, p1);
2297             }
2298
2299           /* Guess we are only writing on simple Ethernet header. */
2300           vnet_rewrite_two_headers (adj0[0], adj1[0],
2301                                     ip0, ip1,
2302                                     sizeof (ethernet_header_t));
2303
2304           if (is_midchain)
2305           {
2306               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2307               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2308           }
2309
2310           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2311                                            to_next, n_left_to_next,
2312                                            pi0, pi1, next0, next1);
2313         }
2314
2315       while (n_left_from > 0 && n_left_to_next > 0)
2316         {
2317           ip_adjacency_t * adj0;
2318           vlib_buffer_t * p0;
2319           ip4_header_t * ip0;
2320           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2321           u32 next0_override;
2322           u32 tx_sw_if_index0;
2323
2324           if (rewrite_for_locally_received_packets)
2325               next0_override = 0;
2326
2327           pi0 = to_next[0] = from[0];
2328
2329           p0 = vlib_get_buffer (vm, pi0);
2330
2331           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2332
2333           /* We should never rewrite a pkt using the MISS adjacency */
2334           ASSERT(adj_index0);
2335
2336           adj0 = ip_get_adjacency (lm, adj_index0);
2337
2338           ip0 = vlib_buffer_get_current (p0);
2339
2340           error0 = IP4_ERROR_NONE;
2341           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2342
2343           /* Decrement TTL & update checksum. */
2344           if (! rewrite_for_locally_received_packets)
2345             {
2346               i32 ttl0 = ip0->ttl;
2347
2348               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2349
2350               checksum0 += checksum0 >= 0xffff;
2351
2352               ip0->checksum = checksum0;
2353
2354               ASSERT (ip0->ttl > 0);
2355
2356               ttl0 -= 1;
2357
2358               ip0->ttl = ttl0;
2359
2360               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2361
2362               if (PREDICT_FALSE(ttl0 <= 0))
2363                 {
2364                   /*
2365                    * If the ttl drops below 1 when forwarding, generate
2366                    * an ICMP response.
2367                    */
2368                   error0 = IP4_ERROR_TIME_EXPIRED;
2369                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2370                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2371                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2372                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2373                 }
2374             }
2375
2376           if (rewrite_for_locally_received_packets)
2377             {
2378               /*
2379                * We have to override the next_index in ARP adjacencies,
2380                * because they're set up for ip4-arp, not this node...
2381                */
2382               if (PREDICT_FALSE(adj0->lookup_next_index
2383                                 == IP_LOOKUP_NEXT_ARP))
2384                 next0_override = IP4_REWRITE_NEXT_ARP;
2385             }
2386
2387           /* Guess we are only writing on simple Ethernet header. */
2388           vnet_rewrite_one_header (adj0[0], ip0,
2389                                    sizeof (ethernet_header_t));
2390
2391           /* Update packet buffer attributes/set output interface. */
2392           rw_len0 = adj0[0].rewrite_header.data_bytes;
2393           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2394
2395           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2396               vlib_increment_combined_counter
2397                   (&adjacency_counters,
2398                    cpu_index, adj_index0,
2399                    /* packet increment */ 0,
2400                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2401
2402           /* Check MTU of outgoing interface. */
2403           error0 = (vlib_buffer_length_in_chain (vm, p0)
2404                     > adj0[0].rewrite_header.max_l3_packet_bytes
2405                     ? IP4_ERROR_MTU_EXCEEDED
2406                     : error0);
2407
2408           p0->error = error_node->errors[error0];
2409
2410           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2411            * to see the IP headerr */
2412           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2413             {
2414               p0->current_data -= rw_len0;
2415               p0->current_length += rw_len0;
2416               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2417
2418               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2419               next0 = adj0[0].rewrite_header.next_index;
2420
2421               if (is_midchain)
2422                 {
2423                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2424                 }
2425
2426               vnet_feature_arc_start(lm->output_feature_arc_index,
2427                                      tx_sw_if_index0, &next0, p0);
2428
2429             }
2430
2431           if (rewrite_for_locally_received_packets)
2432               next0 = next0 && next0_override ? next0_override : next0;
2433
2434           from += 1;
2435           n_left_from -= 1;
2436           to_next += 1;
2437           n_left_to_next -= 1;
2438
2439           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2440                                            to_next, n_left_to_next,
2441                                            pi0, next0);
2442         }
2443
2444       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2445     }
2446
2447   /* Need to do trace after rewrites to pick up new packet data. */
2448   if (node->flags & VLIB_NODE_FLAG_TRACE)
2449     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2450
2451   return frame->n_vectors;
2452 }
2453
2454
2455 /** @brief IPv4 transit rewrite node.
2456     @node ip4-rewrite-transit
2457
2458     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2459     header checksum, fetch the ip adjacency, check the outbound mtu,
2460     apply the adjacency rewrite, and send pkts to the adjacency
2461     rewrite header's rewrite_next_index.
2462
2463     @param vm vlib_main_t corresponding to the current thread
2464     @param node vlib_node_runtime_t
2465     @param frame vlib_frame_t whose contents should be dispatched
2466
2467     @par Graph mechanics: buffer metadata, next index usage
2468
2469     @em Uses:
2470     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2471         - the rewrite adjacency index
2472     - <code>adj->lookup_next_index</code>
2473         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2474           the packet will be dropped.
2475     - <code>adj->rewrite_header</code>
2476         - Rewrite string length, rewrite string, next_index
2477
2478     @em Sets:
2479     - <code>b->current_data, b->current_length</code>
2480         - Updated net of applying the rewrite string
2481
2482     <em>Next Indices:</em>
2483     - <code> adj->rewrite_header.next_index </code>
2484       or @c error-drop
2485 */
2486 static uword
2487 ip4_rewrite_transit (vlib_main_t * vm,
2488                      vlib_node_runtime_t * node,
2489                      vlib_frame_t * frame)
2490 {
2491   return ip4_rewrite_inline (vm, node, frame,
2492                              /* rewrite_for_locally_received_packets */ 0, 0);
2493 }
2494
2495 /** @brief IPv4 local rewrite node.
2496     @node ip4-rewrite-local
2497
2498     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2499     the outbound interface mtu, apply the adjacency rewrite, and send
2500     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2501     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2502     dst = interface addr."
2503
2504     @param vm vlib_main_t corresponding to the current thread
2505     @param node vlib_node_runtime_t
2506     @param frame vlib_frame_t whose contents should be dispatched
2507
2508     @par Graph mechanics: buffer metadata, next index usage
2509
2510     @em Uses:
2511     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2512         - the rewrite adjacency index
2513     - <code>adj->lookup_next_index</code>
2514         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2515           the packet will be dropped.
2516     - <code>adj->rewrite_header</code>
2517         - Rewrite string length, rewrite string, next_index
2518
2519     @em Sets:
2520     - <code>b->current_data, b->current_length</code>
2521         - Updated net of applying the rewrite string
2522
2523     <em>Next Indices:</em>
2524     - <code> adj->rewrite_header.next_index </code>
2525       or @c error-drop
2526 */
2527
2528 static uword
2529 ip4_rewrite_local (vlib_main_t * vm,
2530                    vlib_node_runtime_t * node,
2531                    vlib_frame_t * frame)
2532 {
2533   return ip4_rewrite_inline (vm, node, frame,
2534                              /* rewrite_for_locally_received_packets */ 1, 0);
2535 }
2536
2537 static uword
2538 ip4_midchain (vlib_main_t * vm,
2539               vlib_node_runtime_t * node,
2540               vlib_frame_t * frame)
2541 {
2542   return ip4_rewrite_inline (vm, node, frame,
2543                              /* rewrite_for_locally_received_packets */ 0, 1);
2544 }
2545
2546 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2547   .function = ip4_rewrite_transit,
2548   .name = "ip4-rewrite-transit",
2549   .vector_size = sizeof (u32),
2550
2551   .format_trace = format_ip4_rewrite_trace,
2552
2553   .n_next_nodes = 3,
2554   .next_nodes = {
2555     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2556     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2557     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2558   },
2559 };
2560
2561 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2562
2563 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2564   .function = ip4_midchain,
2565   .name = "ip4-midchain",
2566   .vector_size = sizeof (u32),
2567
2568   .format_trace = format_ip4_forward_next_trace,
2569
2570   .sibling_of = "ip4-rewrite-transit",
2571 };
2572
2573 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2574
2575 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2576   .function = ip4_rewrite_local,
2577   .name = "ip4-rewrite-local",
2578   .vector_size = sizeof (u32),
2579
2580   .sibling_of = "ip4-rewrite-transit",
2581
2582   .format_trace = format_ip4_rewrite_trace,
2583
2584   .n_next_nodes = 0,
2585 };
2586
2587 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2588
2589 static clib_error_t *
2590 add_del_interface_table (vlib_main_t * vm,
2591                          unformat_input_t * input,
2592                          vlib_cli_command_t * cmd)
2593 {
2594   vnet_main_t * vnm = vnet_get_main();
2595   clib_error_t * error = 0;
2596   u32 sw_if_index, table_id;
2597
2598   sw_if_index = ~0;
2599
2600   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2601     {
2602       error = clib_error_return (0, "unknown interface `%U'",
2603                                  format_unformat_error, input);
2604       goto done;
2605     }
2606
2607   if (unformat (input, "%d", &table_id))
2608     ;
2609   else
2610     {
2611       error = clib_error_return (0, "expected table id `%U'",
2612                                  format_unformat_error, input);
2613       goto done;
2614     }
2615
2616   {
2617     ip4_main_t * im = &ip4_main;
2618     u32 fib_index;
2619
2620     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2621                                                    table_id);
2622
2623     //
2624     // FIXME-LATER
2625     //  changing an interface's table has consequences for any connecteds
2626     //  and adj-fibs already installed.
2627     //
2628     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2629     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2630   }
2631
2632  done:
2633   return error;
2634 }
2635
2636 /*?
2637  * Place the indicated interface into the supplied IPv4 FIB table (also known
2638  * as a VRF). If the FIB table does not exist, this command creates it. To
2639  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2640  * FIB table will only be displayed if a route has been added to the table, or
2641  * an IP Address is assigned to an interface in the table (which adds a route
2642  * automatically).
2643  *
2644  * @note IP addresses added after setting the interface IP table end up in
2645  * the indicated FIB table. If the IP address is added prior to adding the
2646  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2647  * but potentially counter-intuitive results occur if you provision interface
2648  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2649  * IP table ID provisioned. It might be marginally useful to evade source RPF
2650  * drops to put an interface address into multiple FIBs.
2651  *
2652  * @cliexpar
2653  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2654  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2655  ?*/
2656 /* *INDENT-OFF* */
2657 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2658   .path = "set interface ip table",
2659   .function = add_del_interface_table,
2660   .short_help = "set interface ip table <interface> <table-id>",
2661 };
2662 /* *INDENT-ON* */
2663
2664
2665 static uword
2666 ip4_lookup_multicast (vlib_main_t * vm,
2667                       vlib_node_runtime_t * node,
2668                       vlib_frame_t * frame)
2669 {
2670   ip4_main_t * im = &ip4_main;
2671   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2672   u32 n_left_from, n_left_to_next, * from, * to_next;
2673   ip_lookup_next_t next;
2674   u32 cpu_index = os_get_cpu_number();
2675
2676   from = vlib_frame_vector_args (frame);
2677   n_left_from = frame->n_vectors;
2678   next = node->cached_next_index;
2679
2680   while (n_left_from > 0)
2681     {
2682       vlib_get_next_frame (vm, node, next,
2683                            to_next, n_left_to_next);
2684
2685       while (n_left_from >= 4 && n_left_to_next >= 2)
2686         {
2687           vlib_buffer_t * p0, * p1;
2688           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2689           ip_lookup_next_t next0, next1;
2690           ip4_header_t * ip0, * ip1;
2691           u32 fib_index0, fib_index1;
2692           const dpo_id_t *dpo0, *dpo1;
2693           const load_balance_t * lb0, * lb1;
2694
2695           /* Prefetch next iteration. */
2696           {
2697             vlib_buffer_t * p2, * p3;
2698
2699             p2 = vlib_get_buffer (vm, from[2]);
2700             p3 = vlib_get_buffer (vm, from[3]);
2701
2702             vlib_prefetch_buffer_header (p2, LOAD);
2703             vlib_prefetch_buffer_header (p3, LOAD);
2704
2705             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2706             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2707           }
2708
2709           pi0 = to_next[0] = from[0];
2710           pi1 = to_next[1] = from[1];
2711
2712           p0 = vlib_get_buffer (vm, pi0);
2713           p1 = vlib_get_buffer (vm, pi1);
2714
2715           ip0 = vlib_buffer_get_current (p0);
2716           ip1 = vlib_buffer_get_current (p1);
2717
2718           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2719           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2720           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2721             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2722           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2723             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2724
2725           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2726                                                &ip0->dst_address);
2727           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2728                                                &ip1->dst_address);
2729
2730           lb0 = load_balance_get (lb_index0);
2731           lb1 = load_balance_get (lb_index1);
2732
2733           ASSERT (lb0->lb_n_buckets > 0);
2734           ASSERT (is_pow2 (lb0->lb_n_buckets));
2735           ASSERT (lb1->lb_n_buckets > 0);
2736           ASSERT (is_pow2 (lb1->lb_n_buckets));
2737
2738           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2739               (ip0, lb0->lb_hash_config);
2740
2741           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2742               (ip1, lb1->lb_hash_config);
2743
2744           dpo0 = load_balance_get_bucket_i(lb0,
2745                                            (vnet_buffer (p0)->ip.flow_hash &
2746                                             (lb0->lb_n_buckets_minus_1)));
2747           dpo1 = load_balance_get_bucket_i(lb1,
2748                                            (vnet_buffer (p1)->ip.flow_hash &
2749                                             (lb1->lb_n_buckets_minus_1)));
2750
2751           next0 = dpo0->dpoi_next_node;
2752           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2753           next1 = dpo1->dpoi_next_node;
2754           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2755
2756           if (1) /* $$$$$$ HACK FIXME */
2757           vlib_increment_combined_counter
2758               (cm, cpu_index, lb_index0, 1,
2759                vlib_buffer_length_in_chain (vm, p0));
2760           if (1) /* $$$$$$ HACK FIXME */
2761           vlib_increment_combined_counter
2762               (cm, cpu_index, lb_index1, 1,
2763                vlib_buffer_length_in_chain (vm, p1));
2764
2765           from += 2;
2766           to_next += 2;
2767           n_left_to_next -= 2;
2768           n_left_from -= 2;
2769
2770           wrong_next = (next0 != next) + 2*(next1 != next);
2771           if (PREDICT_FALSE (wrong_next != 0))
2772             {
2773               switch (wrong_next)
2774                 {
2775                 case 1:
2776                   /* A B A */
2777                   to_next[-2] = pi1;
2778                   to_next -= 1;
2779                   n_left_to_next += 1;
2780                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2781                   break;
2782
2783                 case 2:
2784                   /* A A B */
2785                   to_next -= 1;
2786                   n_left_to_next += 1;
2787                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2788                   break;
2789
2790                 case 3:
2791                   /* A B C */
2792                   to_next -= 2;
2793                   n_left_to_next += 2;
2794                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2795                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2796                   if (next0 == next1)
2797                     {
2798                       /* A B B */
2799                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2800                       next = next1;
2801                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2802                     }
2803                 }
2804             }
2805         }
2806
2807       while (n_left_from > 0 && n_left_to_next > 0)
2808         {
2809           vlib_buffer_t * p0;
2810           ip4_header_t * ip0;
2811           u32 pi0, lb_index0;
2812           ip_lookup_next_t next0;
2813           u32 fib_index0;
2814           const dpo_id_t *dpo0;
2815           const load_balance_t * lb0;
2816
2817           pi0 = from[0];
2818           to_next[0] = pi0;
2819
2820           p0 = vlib_get_buffer (vm, pi0);
2821
2822           ip0 = vlib_buffer_get_current (p0);
2823
2824           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2825                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2826           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2827               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2828
2829           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2830                                                &ip0->dst_address);
2831
2832           lb0 = load_balance_get (lb_index0);
2833
2834           ASSERT (lb0->lb_n_buckets > 0);
2835           ASSERT (is_pow2 (lb0->lb_n_buckets));
2836
2837           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2838               (ip0, lb0->lb_hash_config);
2839
2840           dpo0 = load_balance_get_bucket_i(lb0,
2841                                            (vnet_buffer (p0)->ip.flow_hash &
2842                                             (lb0->lb_n_buckets_minus_1)));
2843
2844           next0 = dpo0->dpoi_next_node;
2845           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2846
2847           if (1) /* $$$$$$ HACK FIXME */
2848               vlib_increment_combined_counter
2849                   (cm, cpu_index, lb_index0, 1,
2850                    vlib_buffer_length_in_chain (vm, p0));
2851
2852           from += 1;
2853           to_next += 1;
2854           n_left_to_next -= 1;
2855           n_left_from -= 1;
2856
2857           if (PREDICT_FALSE (next0 != next))
2858             {
2859               n_left_to_next += 1;
2860               vlib_put_next_frame (vm, node, next, n_left_to_next);
2861               next = next0;
2862               vlib_get_next_frame (vm, node, next,
2863                                    to_next, n_left_to_next);
2864               to_next[0] = pi0;
2865               to_next += 1;
2866               n_left_to_next -= 1;
2867             }
2868         }
2869
2870       vlib_put_next_frame (vm, node, next, n_left_to_next);
2871     }
2872
2873   if (node->flags & VLIB_NODE_FLAG_TRACE)
2874       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2875
2876   return frame->n_vectors;
2877 }
2878
2879 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2880   .function = ip4_lookup_multicast,
2881   .name = "ip4-lookup-multicast",
2882   .vector_size = sizeof (u32),
2883   .sibling_of = "ip4-lookup",
2884   .format_trace = format_ip4_lookup_trace,
2885
2886   .n_next_nodes = 0,
2887 };
2888
2889 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2890
2891 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2892   .function = ip4_drop,
2893   .name = "ip4-multicast",
2894   .vector_size = sizeof (u32),
2895
2896   .format_trace = format_ip4_forward_next_trace,
2897
2898   .n_next_nodes = 1,
2899   .next_nodes = {
2900     [0] = "error-drop",
2901   },
2902 };
2903
2904 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2905 {
2906   ip4_fib_mtrie_t * mtrie0;
2907   ip4_fib_mtrie_leaf_t leaf0;
2908   u32 lbi0;
2909
2910   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2911
2912   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2913   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2914   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2915   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2916   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2917
2918   /* Handle default route. */
2919   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2920
2921   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2922
2923   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2924 }
2925
2926 static clib_error_t *
2927 test_lookup_command_fn (vlib_main_t * vm,
2928                         unformat_input_t * input,
2929                         vlib_cli_command_t * cmd)
2930 {
2931   ip4_fib_t *fib;
2932   u32 table_id = 0;
2933   f64 count = 1;
2934   u32 n;
2935   int i;
2936   ip4_address_t ip4_base_address;
2937   u64 errors = 0;
2938
2939   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2940       if (unformat (input, "table %d", &table_id))
2941       {
2942           /* Make sure the entry exists. */
2943           fib = ip4_fib_get(table_id);
2944           if ((fib) && (fib->index != table_id))
2945               return clib_error_return (0, "<fib-index> %d does not exist",
2946                                         table_id);
2947       }
2948       else if (unformat (input, "count %f", &count))
2949         ;
2950
2951       else if (unformat (input, "%U",
2952                          unformat_ip4_address, &ip4_base_address))
2953         ;
2954       else
2955         return clib_error_return (0, "unknown input `%U'",
2956                                   format_unformat_error, input);
2957   }
2958
2959   n = count;
2960
2961   for (i = 0; i < n; i++)
2962     {
2963       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2964         errors++;
2965
2966       ip4_base_address.as_u32 =
2967         clib_host_to_net_u32 (1 +
2968                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2969     }
2970
2971   if (errors)
2972     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2973   else
2974     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2975
2976   return 0;
2977 }
2978
2979 /*?
2980  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2981  * given FIB table to determine if there is a conflict with the
2982  * adjacency table. The fib-id can be determined by using the
2983  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2984  * of 0 is used.
2985  *
2986  * @todo This command uses fib-id, other commands use table-id (not
2987  * just a name, they are different indexes). Would like to change this
2988  * to table-id for consistency.
2989  *
2990  * @cliexpar
2991  * Example of how to run the test lookup command:
2992  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2993  * No errors in 2 lookups
2994  * @cliexend
2995 ?*/
2996 /* *INDENT-OFF* */
2997 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2998     .path = "test lookup",
2999     .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3000     .function = test_lookup_command_fn,
3001 };
3002 /* *INDENT-ON* */
3003
3004 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3005 {
3006   ip4_main_t * im4 = &ip4_main;
3007   ip4_fib_t * fib;
3008   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3009
3010   if (p == 0)
3011     return VNET_API_ERROR_NO_SUCH_FIB;
3012
3013   fib = ip4_fib_get (p[0]);
3014
3015   fib->flow_hash_config = flow_hash_config;
3016   return 0;
3017 }
3018
3019 static clib_error_t *
3020 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3021                              unformat_input_t * input,
3022                              vlib_cli_command_t * cmd)
3023 {
3024   int matched = 0;
3025   u32 table_id = 0;
3026   u32 flow_hash_config = 0;
3027   int rv;
3028
3029   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3030     if (unformat (input, "table %d", &table_id))
3031       matched = 1;
3032 #define _(a,v) \
3033     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3034     foreach_flow_hash_bit
3035 #undef _
3036     else break;
3037   }
3038
3039   if (matched == 0)
3040     return clib_error_return (0, "unknown input `%U'",
3041                               format_unformat_error, input);
3042
3043   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3044   switch (rv)
3045     {
3046     case 0:
3047       break;
3048
3049     case VNET_API_ERROR_NO_SUCH_FIB:
3050       return clib_error_return (0, "no such FIB table %d", table_id);
3051
3052     default:
3053       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3054       break;
3055     }
3056
3057   return 0;
3058 }
3059
3060 /*?
3061  * Configure the set of IPv4 fields used by the flow hash.
3062  *
3063  * @cliexpar
3064  * Example of how to set the flow hash on a given table:
3065  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3066  * Example of display the configured flow hash:
3067  * @cliexstart{show ip fib}
3068  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3069  * 0.0.0.0/0
3070  *   unicast-ip4-chain
3071  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3072  *     [0] [@0]: dpo-drop ip6
3073  * 0.0.0.0/32
3074  *   unicast-ip4-chain
3075  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3076  *     [0] [@0]: dpo-drop ip6
3077  * 224.0.0.0/8
3078  *   unicast-ip4-chain
3079  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3080  *     [0] [@0]: dpo-drop ip6
3081  * 6.0.1.2/32
3082  *   unicast-ip4-chain
3083  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3084  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3085  * 7.0.0.1/32
3086  *   unicast-ip4-chain
3087  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3088  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3089  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3090  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3091  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3092  * 240.0.0.0/8
3093  *   unicast-ip4-chain
3094  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3095  *     [0] [@0]: dpo-drop ip6
3096  * 255.255.255.255/32
3097  *   unicast-ip4-chain
3098  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3099  *     [0] [@0]: dpo-drop ip6
3100  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3101  * 0.0.0.0/0
3102  *   unicast-ip4-chain
3103  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3104  *     [0] [@0]: dpo-drop ip6
3105  * 0.0.0.0/32
3106  *   unicast-ip4-chain
3107  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3108  *     [0] [@0]: dpo-drop ip6
3109  * 172.16.1.0/24
3110  *   unicast-ip4-chain
3111  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3112  *     [0] [@4]: ipv4-glean: af_packet0
3113  * 172.16.1.1/32
3114  *   unicast-ip4-chain
3115  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3116  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3117  * 172.16.1.2/32
3118  *   unicast-ip4-chain
3119  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3120  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3121  * 172.16.2.0/24
3122  *   unicast-ip4-chain
3123  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3124  *     [0] [@4]: ipv4-glean: af_packet1
3125  * 172.16.2.1/32
3126  *   unicast-ip4-chain
3127  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3128  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3129  * 224.0.0.0/8
3130  *   unicast-ip4-chain
3131  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3132  *     [0] [@0]: dpo-drop ip6
3133  * 240.0.0.0/8
3134  *   unicast-ip4-chain
3135  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3136  *     [0] [@0]: dpo-drop ip6
3137  * 255.255.255.255/32
3138  *   unicast-ip4-chain
3139  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3140  *     [0] [@0]: dpo-drop ip6
3141  * @cliexend
3142 ?*/
3143 /* *INDENT-OFF* */
3144 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3145   .path = "set ip flow-hash",
3146   .short_help =
3147   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3148   .function = set_ip_flow_hash_command_fn,
3149 };
3150 /* *INDENT-ON* */
3151
3152 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3153                                  u32 table_index)
3154 {
3155   vnet_main_t * vnm = vnet_get_main();
3156   vnet_interface_main_t * im = &vnm->interface_main;
3157   ip4_main_t * ipm = &ip4_main;
3158   ip_lookup_main_t * lm = &ipm->lookup_main;
3159   vnet_classify_main_t * cm = &vnet_classify_main;
3160   ip4_address_t *if_addr;
3161
3162   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3163     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3164
3165   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3166     return VNET_API_ERROR_NO_SUCH_ENTRY;
3167
3168   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3169   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3170
3171   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3172
3173   if (NULL != if_addr)
3174   {
3175       fib_prefix_t pfx = {
3176           .fp_len = 32,
3177           .fp_proto = FIB_PROTOCOL_IP4,
3178           .fp_addr.ip4 = *if_addr,
3179       };
3180       u32 fib_index;
3181
3182       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3183                                                       sw_if_index);
3184
3185
3186       if (table_index != (u32) ~0)
3187       {
3188           dpo_id_t dpo = DPO_INVALID;
3189
3190           dpo_set(&dpo,
3191                   DPO_CLASSIFY,
3192                   DPO_PROTO_IP4,
3193                   classify_dpo_create(DPO_PROTO_IP4, table_index));
3194
3195           fib_table_entry_special_dpo_add(fib_index,
3196                                           &pfx,
3197                                           FIB_SOURCE_CLASSIFY,
3198                                           FIB_ENTRY_FLAG_NONE,
3199                                           &dpo);
3200           dpo_reset(&dpo);
3201       }
3202       else
3203       {
3204           fib_table_entry_special_remove(fib_index,
3205                                          &pfx,
3206                                          FIB_SOURCE_CLASSIFY);
3207       }
3208   }
3209
3210   return 0;
3211 }
3212
3213 static clib_error_t *
3214 set_ip_classify_command_fn (vlib_main_t * vm,
3215                             unformat_input_t * input,
3216                             vlib_cli_command_t * cmd)
3217 {
3218   u32 table_index = ~0;
3219   int table_index_set = 0;
3220   u32 sw_if_index = ~0;
3221   int rv;
3222
3223   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3224     if (unformat (input, "table-index %d", &table_index))
3225       table_index_set = 1;
3226     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3227                        vnet_get_main(), &sw_if_index))
3228       ;
3229     else
3230       break;
3231   }
3232
3233   if (table_index_set == 0)
3234     return clib_error_return (0, "classify table-index must be specified");
3235
3236   if (sw_if_index == ~0)
3237     return clib_error_return (0, "interface / subif must be specified");
3238
3239   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3240
3241   switch (rv)
3242     {
3243     case 0:
3244       break;
3245
3246     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3247       return clib_error_return (0, "No such interface");
3248
3249     case VNET_API_ERROR_NO_SUCH_ENTRY:
3250       return clib_error_return (0, "No such classifier table");
3251     }
3252   return 0;
3253 }
3254
3255 /*?
3256  * Assign a classification table to an interface. The classification
3257  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3258  * commands. Once the table is create, use this command to filter packets
3259  * on an interface.
3260  *
3261  * @cliexpar
3262  * Example of how to assign a classification table to an interface:
3263  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3264 ?*/
3265 /* *INDENT-OFF* */
3266 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3267     .path = "set ip classify",
3268     .short_help =
3269     "set ip classify intfc <interface> table-index <classify-idx>",
3270     .function = set_ip_classify_command_fn,
3271 };
3272 /* *INDENT-ON* */
3273