Dual Loop Load-Balance Nodes
[vpp.git] / vnet / vnet / mpls / mpls_lookup.c
1 /*
2  * mpls_lookup.c: MPLS lookup
3  *
4  * Copyright (c) 2012-2014 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vlib/vlib.h>
19 #include <vnet/pg/pg.h>
20 #include <vnet/mpls/mpls.h>
21 #include <vnet/fib/mpls_fib.h>
22 #include <vnet/dpo/load_balance.h>
23
24 vlib_node_registration_t mpls_lookup_node;
25
26 typedef struct {
27   u32 next_index;
28   u32 lb_index;
29   u32 lfib_index;
30   u32 label_net_byte_order;
31   u32 hash;
32 } mpls_lookup_trace_t;
33
34 static u8 *
35 format_mpls_lookup_trace (u8 * s, va_list * args)
36 {
37   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
38   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
39   mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
40
41   s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d"
42               "label %d eos %d", 
43               t->next_index, t->lfib_index, t->lb_index, t->hash,
44               vnet_mpls_uc_get_label(
45                   clib_net_to_host_u32(t->label_net_byte_order)),
46               vnet_mpls_uc_get_s(t->label_net_byte_order));
47   return s;
48 }
49
50 /*
51  * Compute flow hash. 
52  * We'll use it to select which adjacency to use for this flow.  And other things.
53  */
54 always_inline u32
55 mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
56                         flow_hash_config_t flow_hash_config)
57 {
58     // FIXME
59     return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl));
60 }
61
62 static inline uword
63 mpls_lookup (vlib_main_t * vm,
64              vlib_node_runtime_t * node,
65              vlib_frame_t * from_frame)
66 {
67   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
68   u32 n_left_from, next_index, * from, * to_next;
69   mpls_main_t * mm = &mpls_main;
70   u32 cpu_index = os_get_cpu_number();
71
72   from = vlib_frame_vector_args (from_frame);
73   n_left_from = from_frame->n_vectors;
74   next_index = node->cached_next_index;
75
76   while (n_left_from > 0)
77     {
78       u32 n_left_to_next;
79
80       vlib_get_next_frame (vm, node, next_index,
81                            to_next, n_left_to_next);
82
83       while (n_left_from >= 4 && n_left_to_next >= 2)
84         {
85           u32 lbi0, next0, lfib_index0, bi0, hash_c0;
86           const mpls_unicast_header_t * h0;
87           const load_balance_t *lb0;
88           const dpo_id_t *dpo0;
89           vlib_buffer_t * b0;
90           u32 lbi1, next1, lfib_index1, bi1, hash_c1;
91           const mpls_unicast_header_t * h1;
92           const load_balance_t *lb1;
93           const dpo_id_t *dpo1;
94           vlib_buffer_t * b1;
95
96            /* Prefetch next iteration. */
97           {
98             vlib_buffer_t * p2, * p3;
99
100             p2 = vlib_get_buffer (vm, from[2]);
101             p3 = vlib_get_buffer (vm, from[3]);
102
103             vlib_prefetch_buffer_header (p2, STORE);
104             vlib_prefetch_buffer_header (p3, STORE);
105
106             CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
107             CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
108           }
109
110           bi0 = to_next[0] = from[0];
111           bi1 = to_next[1] = from[1];
112
113           from += 2;
114           n_left_from -= 2;
115           to_next += 2;
116           n_left_to_next -= 2;
117
118           b0 = vlib_get_buffer (vm, bi0);
119           b1 = vlib_get_buffer (vm, bi1);
120           h0 = vlib_buffer_get_current (b0);
121           h1 = vlib_buffer_get_current (b1);
122
123           lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
124                                 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
125           lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
126                                 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
127
128           lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
129           lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
130           lb0 = load_balance_get(lbi0);
131           lb1 = load_balance_get(lbi1);
132
133           hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
134           hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
135
136           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
137           {
138               hash_c0 = vnet_buffer (b0)->ip.flow_hash =
139                   mpls_compute_flow_hash(h0, lb0->lb_hash_config);
140           }
141           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
142           {
143               hash_c1 = vnet_buffer (b1)->ip.flow_hash =
144                   mpls_compute_flow_hash(h1, lb1->lb_hash_config);
145           }
146
147           ASSERT (lb0->lb_n_buckets > 0);
148           ASSERT (is_pow2 (lb0->lb_n_buckets));
149           ASSERT (lb1->lb_n_buckets > 0);
150           ASSERT (is_pow2 (lb1->lb_n_buckets));
151
152           dpo0 = load_balance_get_bucket_i(lb0,
153                                            (hash_c0 &
154                                             (lb0->lb_n_buckets_minus_1)));
155           dpo1 = load_balance_get_bucket_i(lb1,
156                                            (hash_c1 &
157                                             (lb1->lb_n_buckets_minus_1)));
158
159           next0 = dpo0->dpoi_next_node;
160           next1 = dpo1->dpoi_next_node;
161
162           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
163           vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
164
165           vlib_increment_combined_counter
166               (cm, cpu_index, lbi0, 1,
167                vlib_buffer_length_in_chain (vm, b0));
168           vlib_increment_combined_counter
169               (cm, cpu_index, lbi1, 1,
170                vlib_buffer_length_in_chain (vm, b1));
171
172           /*
173            * pop the label that was just used in the lookup
174            */
175           vlib_buffer_advance(b0, sizeof(*h0));
176           vlib_buffer_advance(b1, sizeof(*h1));
177
178           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
179           {
180               mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
181                                                         b0, sizeof (*tr));
182               tr->next_index = next0;
183               tr->lb_index = lbi0;
184               tr->lfib_index = lfib_index0;
185               tr->hash = hash_c0;
186               tr->label_net_byte_order = h0->label_exp_s_ttl;
187           }
188
189           if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
190           {
191               mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
192                                                         b1, sizeof (*tr));
193               tr->next_index = next1;
194               tr->lb_index = lbi1;
195               tr->lfib_index = lfib_index1;
196               tr->hash = hash_c1;
197               tr->label_net_byte_order = h1->label_exp_s_ttl;
198           }
199
200           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
201                                            to_next, n_left_to_next,
202                                            bi0, bi1, next0, next1);
203         }
204
205       while (n_left_from > 0 && n_left_to_next > 0)
206       {
207           u32 lbi0, next0, lfib_index0, bi0, hash_c0;
208           const mpls_unicast_header_t * h0;
209           const load_balance_t *lb0;
210           const dpo_id_t *dpo0;
211           vlib_buffer_t * b0;
212
213           bi0 = from[0];
214           to_next[0] = bi0;
215           from += 1;
216           to_next += 1;
217           n_left_from -= 1;
218           n_left_to_next -= 1;
219
220           b0 = vlib_get_buffer (vm, bi0);
221           h0 = vlib_buffer_get_current (b0);
222
223           lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
224                                 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
225
226           lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
227           lb0 = load_balance_get(lbi0);
228
229           hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
230           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
231           {
232               hash_c0 = vnet_buffer (b0)->ip.flow_hash =
233                   mpls_compute_flow_hash(h0, lb0->lb_hash_config);
234           }
235
236           ASSERT (lb0->lb_n_buckets > 0);
237           ASSERT (is_pow2 (lb0->lb_n_buckets));
238
239           dpo0 = load_balance_get_bucket_i(lb0,
240                                            (hash_c0 &
241                                             (lb0->lb_n_buckets_minus_1)));
242
243           next0 = dpo0->dpoi_next_node;
244           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
245
246           vlib_increment_combined_counter
247               (cm, cpu_index, lbi0, 1,
248                vlib_buffer_length_in_chain (vm, b0));
249
250           /*
251            * pop the label that was just used in the lookup
252            */
253           vlib_buffer_advance(b0, sizeof(*h0));
254
255           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
256           {
257               mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
258                                                         b0, sizeof (*tr));
259               tr->next_index = next0;
260               tr->lb_index = lbi0;
261               tr->lfib_index = lfib_index0;
262               tr->hash = hash_c0;
263               tr->label_net_byte_order = h0->label_exp_s_ttl;
264           }
265
266           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
267                                            to_next, n_left_to_next,
268                                            bi0, next0);
269         }
270
271       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
272     }
273   vlib_node_increment_counter (vm, mpls_lookup_node.index,
274                                MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
275   return from_frame->n_vectors;
276 }
277
278 static char * mpls_error_strings[] = {
279 #define mpls_error(n,s) s,
280 #include "error.def"
281 #undef mpls_error
282 };
283
284 VLIB_REGISTER_NODE (mpls_lookup_node) = {
285   .function = mpls_lookup,
286   .name = "mpls-lookup",
287   /* Takes a vector of packets. */
288   .vector_size = sizeof (u32),
289   .n_errors = MPLS_N_ERROR,
290   .error_strings = mpls_error_strings,
291
292   .sibling_of = "ip4-lookup",
293
294   .format_buffer = format_mpls_header,
295   .format_trace = format_mpls_lookup_trace,
296   .unformat_buffer = unformat_mpls_header,
297 };
298
299 VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup)
300
301 typedef struct {
302   u32 next_index;
303   u32 lb_index;
304   u32 hash;
305 } mpls_load_balance_trace_t;
306
307 static u8 *
308 format_mpls_load_balance_trace (u8 * s, va_list * args)
309 {
310   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
311   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
312   mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
313
314   s = format (s, "MPLS: next [%d], LB index %d hash %d",
315               t->next_index, t->lb_index, t->hash);
316   return s;
317 }
318
319 always_inline uword
320 mpls_load_balance (vlib_main_t * vm,
321                   vlib_node_runtime_t * node,
322                   vlib_frame_t * frame)
323 {
324   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
325   u32 n_left_from, n_left_to_next, * from, * to_next;
326   u32 cpu_index = os_get_cpu_number();
327   u32 next;
328
329   from = vlib_frame_vector_args (frame);
330   n_left_from = frame->n_vectors;
331   next = node->cached_next_index;
332
333   while (n_left_from > 0)
334     {
335       vlib_get_next_frame (vm, node, next,
336                            to_next, n_left_to_next);
337
338
339       while (n_left_from >= 4 && n_left_to_next >= 2)
340         {
341           mpls_lookup_next_t next0, next1;
342           const load_balance_t *lb0, *lb1;
343           vlib_buffer_t * p0, *p1;
344           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
345           const mpls_unicast_header_t *mpls0, *mpls1;
346           const dpo_id_t *dpo0, *dpo1;
347
348           /* Prefetch next iteration. */
349           {
350             vlib_buffer_t * p2, * p3;
351
352             p2 = vlib_get_buffer (vm, from[2]);
353             p3 = vlib_get_buffer (vm, from[3]);
354
355             vlib_prefetch_buffer_header (p2, STORE);
356             vlib_prefetch_buffer_header (p3, STORE);
357
358             CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE);
359             CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE);
360           }
361
362           pi0 = to_next[0] = from[0];
363           pi1 = to_next[1] = from[1];
364
365           from += 2;
366           n_left_from -= 2;
367           to_next += 2;
368           n_left_to_next -= 2;
369
370           p0 = vlib_get_buffer (vm, pi0);
371           p1 = vlib_get_buffer (vm, pi1);
372
373           mpls0 = vlib_buffer_get_current (p0);
374           mpls1 = vlib_buffer_get_current (p1);
375           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
376           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
377
378           lb0 = load_balance_get(lbi0);
379           lb1 = load_balance_get(lbi1);
380
381           /*
382            * this node is for via FIBs we can re-use the hash value from the
383            * to node if present.
384            * We don't want to use the same hash value at each level in the recursion
385            * graph as that would lead to polarisation
386            */
387           hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
388           hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
389
390           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
391           {
392               if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
393               {
394                   hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
395               }
396               else
397               {
398                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
399               }
400           }
401           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
402           {
403               if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
404               {
405                   hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
406               }
407               else
408               {
409                   hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
410               }
411           }
412
413           dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
414           dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1));
415
416           next0 = dpo0->dpoi_next_node;
417           next1 = dpo1->dpoi_next_node;
418
419           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
420           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
421
422           vlib_increment_combined_counter
423               (cm, cpu_index, lbi0, 1,
424                vlib_buffer_length_in_chain (vm, p0));
425           vlib_increment_combined_counter
426               (cm, cpu_index, lbi1, 1,
427                vlib_buffer_length_in_chain (vm, p1));
428
429           if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
430           {
431               mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
432                                                               p0, sizeof (*tr));
433               tr->next_index = next0;
434               tr->lb_index = lbi0;
435               tr->hash = hc0;
436           }
437
438           vlib_validate_buffer_enqueue_x2 (vm, node, next,
439                                            to_next, n_left_to_next,
440                                            pi0, pi1, next0, next1);
441        }
442
443       while (n_left_from > 0 && n_left_to_next > 0)
444         {
445           mpls_lookup_next_t next0;
446           const load_balance_t *lb0;
447           vlib_buffer_t * p0;
448           u32 pi0, lbi0, hc0;
449           const mpls_unicast_header_t *mpls0;
450           const dpo_id_t *dpo0;
451
452           pi0 = from[0];
453           to_next[0] = pi0;
454           from += 1;
455           to_next += 1;
456           n_left_to_next -= 1;
457           n_left_from -= 1;
458
459           p0 = vlib_get_buffer (vm, pi0);
460
461           mpls0 = vlib_buffer_get_current (p0);
462           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
463
464           lb0 = load_balance_get(lbi0);
465
466           hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
467           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
468           {
469               if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
470               {
471                   hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
472               }
473               else
474               {
475                   hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
476               }
477           }
478
479           dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
480
481           next0 = dpo0->dpoi_next_node;
482           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
483
484           vlib_increment_combined_counter
485               (cm, cpu_index, lbi0, 1,
486                vlib_buffer_length_in_chain (vm, p0));
487
488           vlib_validate_buffer_enqueue_x1 (vm, node, next,
489                                            to_next, n_left_to_next,
490                                            pi0, next0);
491         }
492
493       vlib_put_next_frame (vm, node, next, n_left_to_next);
494     }
495
496   return frame->n_vectors;
497 }
498
499 VLIB_REGISTER_NODE (mpls_load_balance_node) = {
500   .function = mpls_load_balance,
501   .name = "mpls-load-balance",
502   .vector_size = sizeof (u32),
503   .sibling_of = "mpls-lookup",
504
505   .format_trace = format_mpls_load_balance_trace,
506 };
507
508 VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)