80d04ba6d75f58e768bf0a4b93a380fa4ad24704
[vpp.git] / vnet / vnet / sr / sr_replicate.c
1 /*
2  * sr_replicate.c: ipv6 segment routing replicator for multicast
3  *
4  * Copyright (c) 2016 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #if DPDK > 0 /* Cannot run replicate without DPDK */
19 #include <vlib/vlib.h>
20 #include <vnet/vnet.h>
21 #include <vnet/pg/pg.h>
22 #include <vnet/sr/sr.h>
23 #include <vnet/devices/dpdk/dpdk.h>
24 #include <vnet/dpdk_replication.h>
25 #include <vnet/ip/ip.h>
26
27 #include <vppinfra/hash.h>
28 #include <vppinfra/error.h>
29 #include <vppinfra/elog.h>
30
31 typedef struct {
32   /* convenience */
33   vlib_main_t * vlib_main;
34   vnet_main_t * vnet_main;
35 } sr_replicate_main_t;
36
37 sr_replicate_main_t sr_replicate_main;
38
39
40 typedef struct {
41   ip6_address_t src, dst;
42   u16 length;
43   u32 next_index;
44   u32 tunnel_index;
45   u8 sr[256];
46 } sr_replicate_trace_t;
47
48 /* packet trace format function */
49 static u8 * format_sr_replicate_trace (u8 * s, va_list * args)
50 {
51   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
52   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
53   sr_replicate_trace_t * t = va_arg (*args, sr_replicate_trace_t *);
54   ip6_main_t * im = &ip6_main;
55   ip6_sr_main_t * sm = &sr_main;
56   ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
57   ip6_fib_t * rx_fib, * tx_fib;
58
59   rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index,
60                                               IP6_ROUTE_FLAG_FIB_INDEX);
61
62   tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index,
63                                               IP6_ROUTE_FLAG_FIB_INDEX);
64
65   s = format
66     (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n"
67      "           rx-fib-id %d tx-fib-id %d\n%U",
68      "ip6-lookup",
69      format_ip6_address, &t->src,
70      format_ip6_address, &t->dst, t->length,
71      rx_fib->table_id, tx_fib->table_id,
72      format_ip6_sr_header, t->sr, 0 /* print_hmac */);
73   return s;
74
75 }
76
77 #define foreach_sr_replicate_error \
78 _(REPLICATED, "sr packets replicated") \
79 _(NO_BUFFERS, "error allocating buffers for replicas") \
80 _(NO_REPLICAS, "no replicas were needed") \
81 _(NO_BUFFER_DROPS, "sr no buffer drops")
82
83 typedef enum {
84 #define _(sym,str) SR_REPLICATE_ERROR_##sym,
85   foreach_sr_replicate_error
86 #undef _
87   SR_REPLICATE_N_ERROR,
88 } sr_replicate_error_t;
89
90 static char * sr_replicate_error_strings[] = {
91 #define _(sym,string) string,
92   foreach_sr_replicate_error
93 #undef _
94 };
95
96 typedef enum {
97   SR_REPLICATE_NEXT_IP6_LOOKUP,
98   SR_REPLICATE_N_NEXT,
99 } sr_replicate_next_t;
100
101 static uword
102 sr_replicate_node_fn (vlib_main_t * vm,
103                   vlib_node_runtime_t * node,
104                   vlib_frame_t * frame)
105 {
106   u32 n_left_from, * from, * to_next;
107   sr_replicate_next_t next_index;
108   int pkts_replicated = 0;
109   ip6_sr_main_t * sm = &sr_main;
110   int no_buffer_drops = 0;
111   vlib_buffer_free_list_t * fl;
112   unsigned socket_id = rte_socket_id();
113   vlib_buffer_main_t * bm = vm->buffer_main;
114
115   from = vlib_frame_vector_args (frame);
116   n_left_from = frame->n_vectors;
117   next_index = node->cached_next_index;
118
119   fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
120
121   while (n_left_from > 0)
122     {
123       u32 n_left_to_next;
124
125       vlib_get_next_frame (vm, node, next_index,
126                            to_next, n_left_to_next);
127
128       while (n_left_from > 0 && n_left_to_next > 0)
129         {
130           u32 bi0, hdr_bi0;
131           vlib_buffer_t * b0, * orig_b0;
132           struct rte_mbuf * orig_mb0 = 0, * hdr_mb0 = 0, * clone0 = 0;
133           struct rte_mbuf ** hdr_vec = 0, ** rte_mbuf_vec = 0;
134           ip6_sr_policy_t * pol0 = 0;
135           ip6_sr_tunnel_t * t0 = 0;
136           ip6_sr_header_t * hdr_sr0 = 0;
137           ip6_header_t * ip0 = 0, * hdr_ip0 = 0;
138           int num_replicas = 0;
139           int i;
140
141           bi0 = from[0];
142
143           b0 = vlib_get_buffer (vm, bi0);
144           orig_b0 = b0;
145
146           pol0 = pool_elt_at_index (sm->policies,
147                                     vnet_buffer(b0)->ip.save_protocol);
148
149           ip0 = vlib_buffer_get_current (b0);
150           /* Skip forward to the punch-in point */
151           vlib_buffer_advance (b0, sizeof(*ip0));
152
153           orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
154
155           i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0)
156             - (i16) orig_mb0->pkt_len;
157
158           u16 new_data_len0 = (u16)((i16) orig_mb0->data_len + delta0);
159           u16 new_pkt_len0  = (u16)((i16) orig_mb0->pkt_len + delta0);
160
161           orig_mb0->data_len = new_data_len0;
162           orig_mb0->pkt_len = new_pkt_len0;
163           orig_mb0->data_off = (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
164
165           /*
166             Before entering loop determine if we can allocate:
167             - all the new HEADER RTE_MBUFs and assign them to a vector
168             - all the clones
169
170             if successful, then iterate over vectors of resources
171
172            */
173           num_replicas = vec_len (pol0->tunnel_indices);
174
175           if (PREDICT_FALSE(num_replicas == 0))
176             {
177               b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS];
178               goto do_trace0;
179             }
180
181           vec_reset_length (hdr_vec);
182           vec_reset_length (rte_mbuf_vec);
183
184           for (i=0; i < num_replicas; i++)
185             {
186               hdr_mb0 = rte_pktmbuf_alloc(bm->pktmbuf_pools[socket_id]);
187
188               if (i < (num_replicas - 1) )
189                 /* Not the last tunnel to process */
190                 clone0 = rte_pktmbuf_clone
191                     (orig_mb0, bm->pktmbuf_pools[socket_id]);
192               else
193                   /* Last tunnel to process, use original MB */
194                 clone0 = orig_mb0;
195               
196               
197               if (PREDICT_FALSE( !clone0 || !hdr_mb0 ))
198                 {
199                   b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS];
200                   
201                   vec_foreach_index (i, rte_mbuf_vec) 
202                     { 
203                       rte_pktmbuf_free(rte_mbuf_vec[i]); 
204                     } 
205                   vec_free (rte_mbuf_vec);
206                   
207                   vec_foreach_index (i, hdr_vec) 
208                     { 
209                       rte_pktmbuf_free(hdr_vec[i]); 
210                     } 
211                   vec_free (hdr_vec);
212
213                   goto do_trace0;
214                 }
215
216               vec_add1 (hdr_vec, hdr_mb0);
217               vec_add1 (rte_mbuf_vec, clone0);
218
219             }
220
221           for (i=0; i < num_replicas; i++)
222             {
223               vlib_buffer_t * hdr_b0;
224
225               t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
226
227               /* Our replicas */
228               hdr_mb0 = hdr_vec[i];
229               clone0 = rte_mbuf_vec[i];
230
231               hdr_mb0->data_len = sizeof (*ip0) + vec_len (t0->rewrite);
232               hdr_mb0->pkt_len = hdr_mb0->data_len +
233                   vlib_buffer_length_in_chain (vm, orig_b0);
234
235               hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0);
236
237               vlib_buffer_init_for_free_list (hdr_b0, fl);
238
239               memcpy (hdr_b0->data, ip0, sizeof (*ip0));
240               memcpy (hdr_b0->data + sizeof (*ip0), t0->rewrite,
241                       vec_len (t0->rewrite));
242
243               hdr_b0->current_data = 0;
244               hdr_b0->current_length = sizeof (*ip0) + vec_len (t0->rewrite);
245               hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
246
247
248               hdr_b0->total_length_not_including_first_buffer =
249                 hdr_mb0->pkt_len - hdr_b0->current_length;
250
251               hdr_ip0 = (ip6_header_t *) hdr_b0->data;
252               hdr_ip0->payload_length = clib_host_to_net_u16(hdr_mb0->data_len);
253               hdr_sr0 = (ip6_sr_header_t *) (hdr_ip0+1);
254               hdr_sr0->protocol = hdr_ip0->protocol;
255               hdr_ip0->protocol = 43;
256
257               /* Rewrite the ip6 dst address */
258               hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
259               hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
260
261               sr_fix_hmac (sm, hdr_ip0, hdr_sr0);
262
263               /* prepend new header to invariant piece */
264               hdr_mb0->next = clone0;
265               hdr_b0->next_buffer = vlib_get_buffer_index (vm, vlib_buffer_from_rte_mbuf (clone0));
266
267               /* update header's fields */
268               hdr_mb0->pkt_len = (uint16_t)(hdr_mb0->data_len + clone0->pkt_len);
269               hdr_mb0->nb_segs = (uint8_t)(clone0->nb_segs + 1);
270
271               /* copy metadata from source packet*/
272               hdr_mb0->port = clone0->port;
273               hdr_mb0->vlan_tci = clone0->vlan_tci;
274               hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer;
275               hdr_mb0->tx_offload = clone0->tx_offload;
276               hdr_mb0->hash = clone0->hash;
277
278               hdr_mb0->ol_flags = clone0->ol_flags;
279
280               __rte_mbuf_sanity_check(hdr_mb0, 1);
281
282               hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0);
283
284               to_next[0] = hdr_bi0;
285               to_next += 1;
286               n_left_to_next -= 1;
287
288               if (n_left_to_next == 0)
289                 {
290                   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
291                   vlib_get_next_frame (vm, node, next_index,
292                                        to_next, n_left_to_next);
293
294                 }
295               pkts_replicated++;
296             }
297
298           from += 1;
299           n_left_from -= 1;
300
301         do_trace0:
302           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
303             {
304               sr_replicate_trace_t *tr = vlib_add_trace (vm, node,
305                                                        b0, sizeof (*tr));
306               tr->tunnel_index = t0 - sm->tunnels;
307               if (hdr_ip0)
308                 {
309                   memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8,
310                       sizeof (tr->src.as_u8));
311                   memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8,
312                       sizeof (tr->dst.as_u8));
313                 }
314               if (hdr_ip0->payload_length)
315                 tr->length = clib_net_to_host_u16(hdr_ip0->payload_length);
316               else
317                 tr->length = 0;
318               tr->next_index = next_index;
319               memcpy (tr->sr, hdr_sr0, sizeof (tr->sr));
320             }
321
322         }
323
324       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
325     }
326
327   vlib_node_increment_counter (vm, sr_replicate_node.index,
328                                SR_REPLICATE_ERROR_REPLICATED, pkts_replicated);
329
330   vlib_node_increment_counter (vm, sr_replicate_node.index,
331                                SR_REPLICATE_ERROR_NO_BUFFER_DROPS, no_buffer_drops);
332
333   return frame->n_vectors;
334 }
335
336 VLIB_REGISTER_NODE (sr_replicate_node) = {
337   .function = sr_replicate_node_fn,
338   .name = "sr-replicate",
339   .vector_size = sizeof (u32),
340   .format_trace = format_sr_replicate_trace,
341   .type = VLIB_NODE_TYPE_INTERNAL,
342
343   .n_errors = ARRAY_LEN(sr_replicate_error_strings),
344   .error_strings = sr_replicate_error_strings,
345
346   .n_next_nodes = SR_REPLICATE_N_NEXT,
347
348   .next_nodes = {
349         [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup",
350   },
351 };
352
353 clib_error_t *sr_replicate_init (vlib_main_t *vm)
354 {
355   sr_replicate_main_t *msm = &sr_replicate_main;
356
357   msm->vlib_main = vm;
358   msm->vnet_main = vnet_get_main();
359
360   return 0;
361 }
362
363 VLIB_INIT_FUNCTION(sr_replicate_init);
364
365 #endif /* DPDK */