ip4_lookup_inline: leverage vlib_get_buffers to improve perf
[vpp.git] / src / vnet / ip / ip4_forward.h
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.h: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #ifndef __included_ip4_forward_h__
41 #define __included_ip4_forward_h__
42
43 #include <vppinfra/cache.h>
44 #include <vnet/fib/ip4_fib.h>
45 #include <vnet/dpo/load_balance_map.h>
46
47 /**
48  * @file
49  * @brief IPv4 Forwarding.
50  *
51  * This file contains the source code for IPv4 forwarding.
52  */
53
54 always_inline uword
55 ip4_lookup_inline (vlib_main_t * vm,
56                    vlib_node_runtime_t * node,
57                    vlib_frame_t * frame,
58                    int lookup_for_responses_to_locally_received_packets)
59 {
60   ip4_main_t *im = &ip4_main;
61   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
62   u32 n_left_from, n_left_to_next, *from, *to_next;
63   ip_lookup_next_t next;
64   u32 thread_index = vm->thread_index;
65   vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
66   vlib_buffer_t **b = bufs;
67
68   from = vlib_frame_vector_args (frame);
69   n_left_from = frame->n_vectors;
70   next = node->cached_next_index;
71   vlib_get_buffers (vm, from, bufs, n_left_from);
72
73   while (n_left_from > 0)
74     {
75       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
76
77 #if (CLIB_N_PREFETCHES >= 8)
78       while (n_left_from >= 8 && n_left_to_next >= 4)
79         {
80           vlib_buffer_t *p0, *p1, *p2, *p3;
81           ip4_header_t *ip0, *ip1, *ip2, *ip3;
82           ip_lookup_next_t next0, next1, next2, next3;
83           const load_balance_t *lb0, *lb1, *lb2, *lb3;
84           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
85           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
86           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
87           u32 pi0, pi1, pi2, pi3, lb_index0, lb_index1, lb_index2, lb_index3;
88           flow_hash_config_t flow_hash_config0, flow_hash_config1;
89           flow_hash_config_t flow_hash_config2, flow_hash_config3;
90           u32 hash_c0, hash_c1, hash_c2, hash_c3;
91           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
92
93           /* Prefetch next iteration. */
94           {
95             vlib_prefetch_buffer_header (b[4], LOAD);
96             vlib_prefetch_buffer_header (b[5], LOAD);
97             vlib_prefetch_buffer_header (b[6], LOAD);
98             vlib_prefetch_buffer_header (b[7], LOAD);
99
100             CLIB_PREFETCH (b[4]->data, sizeof (ip0[0]), LOAD);
101             CLIB_PREFETCH (b[5]->data, sizeof (ip0[0]), LOAD);
102             CLIB_PREFETCH (b[6]->data, sizeof (ip0[0]), LOAD);
103             CLIB_PREFETCH (b[7]->data, sizeof (ip0[0]), LOAD);
104           }
105
106           pi0 = to_next[0] = from[0];
107           pi1 = to_next[1] = from[1];
108           pi2 = to_next[2] = from[2];
109           pi3 = to_next[3] = from[3];
110
111           from += 4;
112           to_next += 4;
113           n_left_to_next -= 4;
114           n_left_from -= 4;
115
116           p0 = b[0];
117           p1 = b[1];
118           p2 = b[2];
119           p3 = b[3];
120           b += 4;
121
122           ip0 = vlib_buffer_get_current (p0);
123           ip1 = vlib_buffer_get_current (p1);
124           ip2 = vlib_buffer_get_current (p2);
125           ip3 = vlib_buffer_get_current (p3);
126
127           dst_addr0 = &ip0->dst_address;
128           dst_addr1 = &ip1->dst_address;
129           dst_addr2 = &ip2->dst_address;
130           dst_addr3 = &ip3->dst_address;
131
132           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p0);
133           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p1);
134           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p2);
135           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p3);
136
137           if (!lookup_for_responses_to_locally_received_packets)
138             {
139               mtrie0 = &ip4_fib_get (vnet_buffer (p0)->ip.fib_index)->mtrie;
140               mtrie1 = &ip4_fib_get (vnet_buffer (p1)->ip.fib_index)->mtrie;
141               mtrie2 = &ip4_fib_get (vnet_buffer (p2)->ip.fib_index)->mtrie;
142               mtrie3 = &ip4_fib_get (vnet_buffer (p3)->ip.fib_index)->mtrie;
143
144               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
145               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
146               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
147               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
148             }
149
150           if (!lookup_for_responses_to_locally_received_packets)
151             {
152               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
153               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
154               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
155               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
156             }
157
158           if (!lookup_for_responses_to_locally_received_packets)
159             {
160               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
161               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
162               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
163               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
164             }
165
166           if (lookup_for_responses_to_locally_received_packets)
167             {
168               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
169               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
170               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
171               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
172             }
173           else
174             {
175               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
176               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
177               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
178               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
179             }
180
181           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
182           lb0 = load_balance_get (lb_index0);
183           lb1 = load_balance_get (lb_index1);
184           lb2 = load_balance_get (lb_index2);
185           lb3 = load_balance_get (lb_index3);
186
187           ASSERT (lb0->lb_n_buckets > 0);
188           ASSERT (is_pow2 (lb0->lb_n_buckets));
189           ASSERT (lb1->lb_n_buckets > 0);
190           ASSERT (is_pow2 (lb1->lb_n_buckets));
191           ASSERT (lb2->lb_n_buckets > 0);
192           ASSERT (is_pow2 (lb2->lb_n_buckets));
193           ASSERT (lb3->lb_n_buckets > 0);
194           ASSERT (is_pow2 (lb3->lb_n_buckets));
195
196           /* Use flow hash to compute multipath adjacency. */
197           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
198           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
199           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
200           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
201           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
202             {
203               flow_hash_config0 = lb0->lb_hash_config;
204               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
205                 ip4_compute_flow_hash (ip0, flow_hash_config0);
206               dpo0 =
207                 load_balance_get_fwd_bucket (lb0,
208                                              (hash_c0 &
209                                               (lb0->lb_n_buckets_minus_1)));
210             }
211           else
212             {
213               dpo0 = load_balance_get_bucket_i (lb0, 0);
214             }
215           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
216             {
217               flow_hash_config1 = lb1->lb_hash_config;
218               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
219                 ip4_compute_flow_hash (ip1, flow_hash_config1);
220               dpo1 =
221                 load_balance_get_fwd_bucket (lb1,
222                                              (hash_c1 &
223                                               (lb1->lb_n_buckets_minus_1)));
224             }
225           else
226             {
227               dpo1 = load_balance_get_bucket_i (lb1, 0);
228             }
229           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
230             {
231               flow_hash_config2 = lb2->lb_hash_config;
232               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
233                 ip4_compute_flow_hash (ip2, flow_hash_config2);
234               dpo2 =
235                 load_balance_get_fwd_bucket (lb2,
236                                              (hash_c2 &
237                                               (lb2->lb_n_buckets_minus_1)));
238             }
239           else
240             {
241               dpo2 = load_balance_get_bucket_i (lb2, 0);
242             }
243           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
244             {
245               flow_hash_config3 = lb3->lb_hash_config;
246               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
247                 ip4_compute_flow_hash (ip3, flow_hash_config3);
248               dpo3 =
249                 load_balance_get_fwd_bucket (lb3,
250                                              (hash_c3 &
251                                               (lb3->lb_n_buckets_minus_1)));
252             }
253           else
254             {
255               dpo3 = load_balance_get_bucket_i (lb3, 0);
256             }
257
258           next0 = dpo0->dpoi_next_node;
259           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
260           next1 = dpo1->dpoi_next_node;
261           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
262           next2 = dpo2->dpoi_next_node;
263           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
264           next3 = dpo3->dpoi_next_node;
265           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
266
267           vlib_increment_combined_counter
268             (cm, thread_index, lb_index0, 1,
269              vlib_buffer_length_in_chain (vm, p0));
270           vlib_increment_combined_counter
271             (cm, thread_index, lb_index1, 1,
272              vlib_buffer_length_in_chain (vm, p1));
273           vlib_increment_combined_counter
274             (cm, thread_index, lb_index2, 1,
275              vlib_buffer_length_in_chain (vm, p2));
276           vlib_increment_combined_counter
277             (cm, thread_index, lb_index3, 1,
278              vlib_buffer_length_in_chain (vm, p3));
279
280           vlib_validate_buffer_enqueue_x4 (vm, node, next,
281                                            to_next, n_left_to_next,
282                                            pi0, pi1, pi2, pi3,
283                                            next0, next1, next2, next3);
284         }
285 #elif (CLIB_N_PREFETCHES >= 4)
286       while (n_left_from >= 4 && n_left_to_next >= 2)
287         {
288           vlib_buffer_t *p0, *p1;
289           ip4_header_t *ip0, *ip1;
290           ip_lookup_next_t next0, next1;
291           const load_balance_t *lb0, *lb1;
292           ip4_fib_mtrie_t *mtrie0, *mtrie1;
293           ip4_fib_mtrie_leaf_t leaf0, leaf1;
294           ip4_address_t *dst_addr0, *dst_addr1;
295           u32 pi0, pi1, lb_index0, lb_index1;
296           flow_hash_config_t flow_hash_config0, flow_hash_config1;
297           u32 hash_c0, hash_c1;
298           const dpo_id_t *dpo0, *dpo1;
299
300           /* Prefetch next iteration. */
301           {
302             vlib_prefetch_buffer_header (b[2], LOAD);
303             vlib_prefetch_buffer_header (b[3], LOAD);
304
305             CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
306             CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
307           }
308
309           pi0 = to_next[0] = from[0];
310           pi1 = to_next[1] = from[1];
311
312           from += 2;
313           to_next += 2;
314           n_left_to_next -= 2;
315           n_left_from -= 2;
316
317           p0 = b[0];
318           p1 = b[1];
319           b += 2;
320
321           ip0 = vlib_buffer_get_current (p0);
322           ip1 = vlib_buffer_get_current (p1);
323
324           dst_addr0 = &ip0->dst_address;
325           dst_addr1 = &ip1->dst_address;
326
327           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p0);
328           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p1);
329
330           if (!lookup_for_responses_to_locally_received_packets)
331             {
332               mtrie0 = &ip4_fib_get (vnet_buffer (p0)->ip.fib_index)->mtrie;
333               mtrie1 = &ip4_fib_get (vnet_buffer (p1)->ip.fib_index)->mtrie;
334
335               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
336               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
337             }
338
339           if (!lookup_for_responses_to_locally_received_packets)
340             {
341               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
342               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
343             }
344
345           if (!lookup_for_responses_to_locally_received_packets)
346             {
347               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
348               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
349             }
350
351           if (lookup_for_responses_to_locally_received_packets)
352             {
353               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
354               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
355             }
356           else
357             {
358               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
359               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
360             }
361
362           ASSERT (lb_index0 && lb_index1);
363           lb0 = load_balance_get (lb_index0);
364           lb1 = load_balance_get (lb_index1);
365
366           ASSERT (lb0->lb_n_buckets > 0);
367           ASSERT (is_pow2 (lb0->lb_n_buckets));
368           ASSERT (lb1->lb_n_buckets > 0);
369           ASSERT (is_pow2 (lb1->lb_n_buckets));
370
371           /* Use flow hash to compute multipath adjacency. */
372           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
373           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
374           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
375             {
376               flow_hash_config0 = lb0->lb_hash_config;
377               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
378                 ip4_compute_flow_hash (ip0, flow_hash_config0);
379               dpo0 =
380                 load_balance_get_fwd_bucket (lb0,
381                                              (hash_c0 &
382                                               (lb0->lb_n_buckets_minus_1)));
383             }
384           else
385             {
386               dpo0 = load_balance_get_bucket_i (lb0, 0);
387             }
388           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
389             {
390               flow_hash_config1 = lb1->lb_hash_config;
391               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
392                 ip4_compute_flow_hash (ip1, flow_hash_config1);
393               dpo1 =
394                 load_balance_get_fwd_bucket (lb1,
395                                              (hash_c1 &
396                                               (lb1->lb_n_buckets_minus_1)));
397             }
398           else
399             {
400               dpo1 = load_balance_get_bucket_i (lb1, 0);
401             }
402
403           next0 = dpo0->dpoi_next_node;
404           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
405           next1 = dpo1->dpoi_next_node;
406           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
407
408           vlib_increment_combined_counter
409             (cm, thread_index, lb_index0, 1,
410              vlib_buffer_length_in_chain (vm, p0));
411           vlib_increment_combined_counter
412             (cm, thread_index, lb_index1, 1,
413              vlib_buffer_length_in_chain (vm, p1));
414
415           vlib_validate_buffer_enqueue_x2 (vm, node, next,
416                                            to_next, n_left_to_next,
417                                            pi0, pi1, next0, next1);
418         }
419 #endif
420       while (n_left_from > 0 && n_left_to_next > 0)
421         {
422           vlib_buffer_t *p0;
423           ip4_header_t *ip0;
424           ip_lookup_next_t next0;
425           const load_balance_t *lb0;
426           ip4_fib_mtrie_t *mtrie0;
427           ip4_fib_mtrie_leaf_t leaf0;
428           ip4_address_t *dst_addr0;
429           u32 pi0, lbi0;
430           flow_hash_config_t flow_hash_config0;
431           const dpo_id_t *dpo0;
432           u32 hash_c0;
433
434           pi0 = from[0];
435           to_next[0] = pi0;
436
437           p0 = b[0];
438           b += 1;
439
440           ip0 = vlib_buffer_get_current (p0);
441           dst_addr0 = &ip0->dst_address;
442           ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, p0);
443
444           if (!lookup_for_responses_to_locally_received_packets)
445             {
446               mtrie0 = &ip4_fib_get (vnet_buffer (p0)->ip.fib_index)->mtrie;
447               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
448             }
449
450           if (!lookup_for_responses_to_locally_received_packets)
451             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
452
453           if (!lookup_for_responses_to_locally_received_packets)
454             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
455
456           if (lookup_for_responses_to_locally_received_packets)
457             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
458           else
459             {
460               /* Handle default route. */
461               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
462             }
463
464           ASSERT (lbi0);
465           lb0 = load_balance_get (lbi0);
466
467           ASSERT (lb0->lb_n_buckets > 0);
468           ASSERT (is_pow2 (lb0->lb_n_buckets));
469
470           /* Use flow hash to compute multipath adjacency. */
471           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
472           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
473             {
474               flow_hash_config0 = lb0->lb_hash_config;
475
476               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
477                 ip4_compute_flow_hash (ip0, flow_hash_config0);
478               dpo0 =
479                 load_balance_get_fwd_bucket (lb0,
480                                              (hash_c0 &
481                                               (lb0->lb_n_buckets_minus_1)));
482             }
483           else
484             {
485               dpo0 = load_balance_get_bucket_i (lb0, 0);
486             }
487
488           next0 = dpo0->dpoi_next_node;
489           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490
491           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
492                                            vlib_buffer_length_in_chain (vm,
493                                                                         p0));
494
495           from += 1;
496           to_next += 1;
497           n_left_to_next -= 1;
498           n_left_from -= 1;
499
500           if (PREDICT_FALSE (next0 != next))
501             {
502               n_left_to_next += 1;
503               vlib_put_next_frame (vm, node, next, n_left_to_next);
504               next = next0;
505               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506               to_next[0] = pi0;
507               to_next += 1;
508               n_left_to_next -= 1;
509             }
510         }
511
512       vlib_put_next_frame (vm, node, next, n_left_to_next);
513     }
514
515   if (node->flags & VLIB_NODE_FLAG_TRACE)
516     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
517
518   return frame->n_vectors;
519 }
520
521 #endif /* __included_ip4_forward_h__ */
522
523 /*
524  * fd.io coding-style-patch-verification: ON
525  *
526  * Local Variables:
527  * eval: (c-set-style "gnu")
528  * End:
529  */