2 * sr_replicate.c: ipv6 segment routing replicator for multicast
4 * Copyright (c) 2016 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 * @brief Functions for replicating packets across SR tunnels.
21 * Leverages rte_pktmbuf_clone() so there is no memcpy for
22 * invariant parts of the packet.
24 * @note Currently requires DPDK
27 #if DPDK > 0 /* Cannot run replicate without DPDK */
28 #include <vlib/vlib.h>
29 #include <vnet/vnet.h>
30 #include <vnet/pg/pg.h>
31 #include <vnet/sr/sr.h>
32 #include <vnet/devices/dpdk/dpdk.h>
33 #include <vnet/dpdk_replication.h>
34 #include <vnet/ip/ip.h>
36 #include <vppinfra/hash.h>
37 #include <vppinfra/error.h>
38 #include <vppinfra/elog.h>
41 * @brief sr_replicate state.
47 vlib_main_t *vlib_main;
48 vnet_main_t *vnet_main;
49 } sr_replicate_main_t;
51 sr_replicate_main_t sr_replicate_main;
54 * @brief Information to display in packet trace.
59 ip6_address_t src, dst;
64 } sr_replicate_trace_t;
67 * @brief packet trace format function.
69 * @param *s u8 used for string output
70 * @param *args va_list structured input to va_arg to output @ref sr_replicate_trace_t
71 * @return *s u8 - formatted trace output
74 format_sr_replicate_trace (u8 * s, va_list * args)
76 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
77 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
78 sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *);
79 ip6_main_t *im = &ip6_main;
80 ip6_sr_main_t *sm = &sr_main;
81 ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
82 ip6_fib_t *rx_fib, *tx_fib;
84 rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index,
85 IP6_ROUTE_FLAG_FIB_INDEX);
87 tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index,
88 IP6_ROUTE_FLAG_FIB_INDEX);
91 (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n"
92 " rx-fib-id %d tx-fib-id %d\n%U",
94 format_ip6_address, &t->src,
95 format_ip6_address, &t->dst, t->length,
96 rx_fib->table_id, tx_fib->table_id,
97 format_ip6_sr_header, t->sr, 0 /* print_hmac */ );
102 #define foreach_sr_replicate_error \
103 _(REPLICATED, "sr packets replicated") \
104 _(NO_BUFFERS, "error allocating buffers for replicas") \
105 _(NO_REPLICAS, "no replicas were needed") \
106 _(NO_BUFFER_DROPS, "sr no buffer drops")
110 #define _(sym,str) SR_REPLICATE_ERROR_##sym,
111 foreach_sr_replicate_error
113 SR_REPLICATE_N_ERROR,
114 } sr_replicate_error_t;
116 static char *sr_replicate_error_strings[] = {
117 #define _(sym,string) string,
118 foreach_sr_replicate_error
123 * @brief Defines next-nodes for packet processing.
128 SR_REPLICATE_NEXT_IP6_LOOKUP,
130 } sr_replicate_next_t;
133 * @brief Single loop packet replicator.
136 * @param vm vlib_main_t
137 * @return frame->n_vectors uword
140 sr_replicate_node_fn (vlib_main_t * vm,
141 vlib_node_runtime_t * node, vlib_frame_t * frame)
143 u32 n_left_from, *from, *to_next;
144 sr_replicate_next_t next_index;
145 int pkts_replicated = 0;
146 ip6_sr_main_t *sm = &sr_main;
147 int no_buffer_drops = 0;
148 vlib_buffer_free_list_t *fl;
149 unsigned socket_id = rte_socket_id ();
150 vlib_buffer_main_t *bm = vm->buffer_main;
152 from = vlib_frame_vector_args (frame);
153 n_left_from = frame->n_vectors;
154 next_index = node->cached_next_index;
156 fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
158 while (n_left_from > 0)
162 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
164 while (n_left_from > 0 && n_left_to_next > 0)
167 vlib_buffer_t *b0, *orig_b0;
168 struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0;
169 struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0;
170 ip6_sr_policy_t *pol0 = 0;
171 ip6_sr_tunnel_t *t0 = 0;
172 ip6_sr_header_t *hdr_sr0 = 0;
173 ip6_header_t *ip0 = 0, *hdr_ip0 = 0;
174 int num_replicas = 0;
179 b0 = vlib_get_buffer (vm, bi0);
182 pol0 = pool_elt_at_index (sm->policies,
183 vnet_buffer (b0)->ip.save_protocol);
185 ip0 = vlib_buffer_get_current (b0);
186 /* Skip forward to the punch-in point */
187 vlib_buffer_advance (b0, sizeof (*ip0));
189 orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
191 i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0)
192 - (i16) orig_mb0->pkt_len;
194 u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0);
195 u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0);
197 orig_mb0->data_len = new_data_len0;
198 orig_mb0->pkt_len = new_pkt_len0;
200 (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
203 Before entering loop determine if we can allocate:
204 - all the new HEADER RTE_MBUFs and assign them to a vector
207 if successful, then iterate over vectors of resources
210 num_replicas = vec_len (pol0->tunnel_indices);
212 if (PREDICT_FALSE (num_replicas == 0))
214 b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS];
218 vec_reset_length (hdr_vec);
219 vec_reset_length (rte_mbuf_vec);
221 for (i = 0; i < num_replicas; i++)
223 hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
225 if (i < (num_replicas - 1))
226 /* Not the last tunnel to process */
227 clone0 = rte_pktmbuf_clone
228 (orig_mb0, bm->pktmbuf_pools[socket_id]);
230 /* Last tunnel to process, use original MB */
234 if (PREDICT_FALSE (!clone0 || !hdr_mb0))
236 b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS];
238 vec_foreach_index (i, rte_mbuf_vec)
240 rte_pktmbuf_free (rte_mbuf_vec[i]);
242 vec_free (rte_mbuf_vec);
244 vec_foreach_index (i, hdr_vec)
246 rte_pktmbuf_free (hdr_vec[i]);
253 vec_add1 (hdr_vec, hdr_mb0);
254 vec_add1 (rte_mbuf_vec, clone0);
258 for (i = 0; i < num_replicas; i++)
260 vlib_buffer_t *hdr_b0;
262 t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
265 hdr_mb0 = hdr_vec[i];
266 clone0 = rte_mbuf_vec[i];
268 hdr_mb0->data_len = sizeof (*ip0) + vec_len (t0->rewrite);
269 hdr_mb0->pkt_len = hdr_mb0->data_len +
270 vlib_buffer_length_in_chain (vm, orig_b0);
272 hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0);
274 vlib_buffer_init_for_free_list (hdr_b0, fl);
276 memcpy (hdr_b0->data, ip0, sizeof (*ip0));
277 memcpy (hdr_b0->data + sizeof (*ip0), t0->rewrite,
278 vec_len (t0->rewrite));
280 hdr_b0->current_data = 0;
281 hdr_b0->current_length = sizeof (*ip0) + vec_len (t0->rewrite);
282 hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
285 hdr_b0->total_length_not_including_first_buffer =
286 hdr_mb0->pkt_len - hdr_b0->current_length;
288 hdr_ip0 = (ip6_header_t *) hdr_b0->data;
289 hdr_ip0->payload_length =
290 clib_host_to_net_u16 (hdr_mb0->data_len);
291 hdr_sr0 = (ip6_sr_header_t *) (hdr_ip0 + 1);
292 hdr_sr0->protocol = hdr_ip0->protocol;
293 hdr_ip0->protocol = 43;
295 /* Rewrite the ip6 dst address */
296 hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
297 hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
299 sr_fix_hmac (sm, hdr_ip0, hdr_sr0);
301 /* prepend new header to invariant piece */
302 hdr_mb0->next = clone0;
303 hdr_b0->next_buffer =
304 vlib_get_buffer_index (vm,
305 vlib_buffer_from_rte_mbuf (clone0));
307 /* update header's fields */
309 (uint16_t) (hdr_mb0->data_len + clone0->pkt_len);
310 hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1);
312 /* copy metadata from source packet */
313 hdr_mb0->port = clone0->port;
314 hdr_mb0->vlan_tci = clone0->vlan_tci;
315 hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer;
316 hdr_mb0->tx_offload = clone0->tx_offload;
317 hdr_mb0->hash = clone0->hash;
319 hdr_mb0->ol_flags = clone0->ol_flags;
321 __rte_mbuf_sanity_check (hdr_mb0, 1);
323 hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0);
325 to_next[0] = hdr_bi0;
329 if (n_left_to_next == 0)
331 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
332 vlib_get_next_frame (vm, node, next_index,
333 to_next, n_left_to_next);
343 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
345 sr_replicate_trace_t *tr = vlib_add_trace (vm, node,
347 tr->tunnel_index = t0 - sm->tunnels;
351 memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8,
352 sizeof (tr->src.as_u8));
353 memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8,
354 sizeof (tr->dst.as_u8));
355 if (hdr_ip0->payload_length)
356 tr->length = clib_net_to_host_u16
357 (hdr_ip0->payload_length);
359 tr->next_index = next_index;
361 memcpy (tr->sr, hdr_sr0, sizeof (tr->sr));
366 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
369 vlib_node_increment_counter (vm, sr_replicate_node.index,
370 SR_REPLICATE_ERROR_REPLICATED,
373 vlib_node_increment_counter (vm, sr_replicate_node.index,
374 SR_REPLICATE_ERROR_NO_BUFFER_DROPS,
377 return frame->n_vectors;
381 VLIB_REGISTER_NODE (sr_replicate_node) = {
382 .function = sr_replicate_node_fn,
383 .name = "sr-replicate",
384 .vector_size = sizeof (u32),
385 .format_trace = format_sr_replicate_trace,
386 .type = VLIB_NODE_TYPE_INTERNAL,
388 .n_errors = ARRAY_LEN(sr_replicate_error_strings),
389 .error_strings = sr_replicate_error_strings,
391 .n_next_nodes = SR_REPLICATE_N_NEXT,
394 [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup",
399 VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn)
400 clib_error_t *sr_replicate_init (vlib_main_t * vm)
402 sr_replicate_main_t *msm = &sr_replicate_main;
405 msm->vnet_main = vnet_get_main ();
410 VLIB_INIT_FUNCTION (sr_replicate_init);
415 * fd.io coding-style-patch-verification: ON
418 * eval: (c-set-style "gnu")