2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/ip.h>
17 #include <cnat/cnat_snat_policy.h>
18 #include <cnat/cnat_translation.h>
20 cnat_snat_policy_main_t cnat_snat_policy_main;
23 unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args)
25 u8 *a = va_arg (*args, u8 *);
26 if (unformat (input, "include-v4"))
27 *a = CNAT_SNAT_IF_MAP_INCLUDE_V4;
28 else if (unformat (input, "include-v6"))
29 *a = CNAT_SNAT_IF_MAP_INCLUDE_V6;
30 else if (unformat (input, "k8s"))
31 *a = CNAT_SNAT_IF_MAP_INCLUDE_POD;
32 else if (unformat (input, "host"))
33 *a = CNAT_SNAT_IF_MAP_INCLUDE_HOST;
40 format_cnat_snat_interface_map_type (u8 *s, va_list *args)
42 cnat_snat_interface_map_type_t mtype = va_arg (*args, int);
45 case CNAT_SNAT_IF_MAP_INCLUDE_V4:
46 s = format (s, "Included v4");
48 case CNAT_SNAT_IF_MAP_INCLUDE_V6:
49 s = format (s, "Included v6");
51 case CNAT_SNAT_IF_MAP_INCLUDE_POD:
52 s = format (s, "k8s pod");
54 case CNAT_SNAT_IF_MAP_INCLUDE_HOST:
55 s = format (s, "k8s host");
58 s = format (s, "(unknown)");
65 format_cnat_snat_prefix (u8 *s, va_list *args)
67 clib_bihash_kv_24_8_t *kv = va_arg (*args, clib_bihash_kv_24_8_t *);
68 CLIB_UNUSED (int verbose) = va_arg (*args, int);
69 u32 af = kv->key[2] >> 32;
70 u32 len = kv->key[2] & 0xffffffff;
72 s = format (s, "%U/%d", format_ip4_address, &kv->key[0], len);
74 s = format (s, "%U/%d", format_ip6_address, &kv->key[0], len);
79 cnat_compute_prefix_lengths_in_search_order (
80 cnat_snat_exclude_pfx_table_t *table, ip_address_family_t af)
83 vec_reset_length (table->meta[af].prefix_lengths_in_search_order);
84 /* Note: bitmap reversed so this is in fact a longest prefix match */
85 clib_bitmap_foreach (i, table->meta[af].non_empty_dst_address_length_bitmap)
87 int dst_address_length = 128 - i;
88 vec_add1 (table->meta[af].prefix_lengths_in_search_order,
94 cnat_snat_policy_add_del_if (u32 sw_if_index, u8 is_add,
95 cnat_snat_interface_map_type_t table)
97 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
99 if (table >= ARRAY_LEN (cpm->interface_maps))
100 return VNET_API_ERROR_INVALID_VALUE;
102 clib_bitmap_t **map = &cpm->interface_maps[table];
104 *map = clib_bitmap_set (*map, sw_if_index, is_add);
108 static clib_error_t *
109 cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm,
110 unformat_input_t *input,
111 vlib_cli_command_t *cmd)
113 vnet_main_t *vnm = vnet_get_main ();
115 u32 sw_if_index = ~0;
119 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
121 if (unformat (input, "del"))
123 else if (unformat (input, "table %U",
124 unformat_cnat_snat_interface_map_type, &table))
126 else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
130 return clib_error_return (0, "unknown input '%U'",
131 format_unformat_error, input);
134 if (sw_if_index == ~0)
135 return clib_error_return (0, "Interface not specified");
137 rv = cnat_snat_policy_add_del_if (sw_if_index, is_add, table);
140 return clib_error_return (0, "Error %d", rv);
145 VLIB_CLI_COMMAND (cnat_snat_policy_add_del_if_command, static) = {
146 .path = "set cnat snat-policy if",
147 .short_help = "set cnat snat-policy if [del]"
148 "[table [include-v4 include-v6 k8s]] [interface]",
149 .function = cnat_snat_policy_add_del_if_command_fn,
153 cnat_snat_policy_add_pfx (ip_prefix_t *pfx)
155 /* All packets destined to this prefix won't be source-NAT-ed */
156 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
157 clib_bihash_kv_24_8_t kv;
159 u64 af = ip_prefix_version (pfx);
162 mask = &table->ip_masks[pfx->len];
165 kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
170 kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
171 kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
173 kv.key[2] = ((u64) af << 32) | pfx->len;
174 clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 1 /* is_add */);
176 table->meta[af].dst_address_length_refcounts[pfx->len]++;
177 table->meta[af].non_empty_dst_address_length_bitmap = clib_bitmap_set (
178 table->meta[af].non_empty_dst_address_length_bitmap, 128 - pfx->len, 1);
179 cnat_compute_prefix_lengths_in_search_order (table, af);
184 cnat_snat_policy_del_pfx (ip_prefix_t *pfx)
186 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
187 clib_bihash_kv_24_8_t kv, val;
189 u64 af = ip_prefix_version (pfx);
192 mask = &table->ip_masks[pfx->len];
195 kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
200 kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
201 kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
203 kv.key[2] = ((u64) af << 32) | pfx->len;
205 if (clib_bihash_search_24_8 (&table->ip_hash, &kv, &val))
209 clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 0 /* is_add */);
210 /* refcount accounting */
211 ASSERT (table->meta[af].dst_address_length_refcounts[pfx->len] > 0);
212 if (--table->meta[af].dst_address_length_refcounts[pfx->len] == 0)
214 table->meta[af].non_empty_dst_address_length_bitmap =
215 clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap,
217 cnat_compute_prefix_lengths_in_search_order (table, af);
223 cnat_search_snat_prefix (ip46_address_t *addr, ip_address_family_t af)
225 /* Returns 0 if addr matches any of the listed prefixes */
226 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
227 clib_bihash_kv_24_8_t kv, val;
229 n_p = vec_len (table->meta[af].prefix_lengths_in_search_order);
232 kv.key[0] = addr->ip4.as_u32;
237 kv.key[0] = addr->as_u64[0];
238 kv.key[1] = addr->as_u64[1];
242 * start search from a mask length same length or shorter.
243 * we don't want matches longer than the mask passed
248 int dst_address_length =
249 table->meta[af].prefix_lengths_in_search_order[i];
250 ip6_address_t *mask = &table->ip_masks[dst_address_length];
252 ASSERT (dst_address_length >= 0 && dst_address_length <= 128);
253 /* As lengths are decreasing, masks are increasingly specific. */
254 kv.key[0] &= mask->as_u64[0];
255 kv.key[1] &= mask->as_u64[1];
256 kv.key[2] = ((u64) af << 32) | dst_address_length;
257 rv = clib_bihash_search_inline_2_24_8 (&table->ip_hash, &kv, &val);
264 static_always_inline int
265 cnat_snat_policy_interface_enabled (u32 sw_if_index, ip_address_family_t af)
267 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
268 return clib_bitmap_get (cpm->interface_maps[af], sw_if_index);
272 cnat_snat_policy_none (vlib_buffer_t *b, cnat_session_t *session)
274 /* srcNAT everything by default */
279 cnat_snat_policy_if_pfx (vlib_buffer_t *b, cnat_session_t *session)
281 ip46_address_t *dst_addr = &session->key.cs_ip[VLIB_TX];
282 u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
283 ip_address_family_t af = session->key.cs_af;
285 /* source nat for outgoing connections */
286 if (cnat_snat_policy_interface_enabled (in_if, af))
287 if (cnat_search_snat_prefix (dst_addr, af))
288 /* Destination is not in the prefixes that don't require snat */
294 cnat_snat_policy_k8s (vlib_buffer_t *b, cnat_session_t *session)
296 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
297 ip_address_family_t af = session->key.cs_af;
299 ip46_address_t *src_addr = &session->key.cs_ip[VLIB_RX];
300 ip46_address_t *dst_addr = &session->key.cs_ip[VLIB_TX];
301 u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
302 u32 out_if = vnet_buffer (b)->sw_if_index[VLIB_TX];
304 /* we should never snat traffic that we punt to the host, pass traffic as it
306 if (clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_HOST],
312 /* source nat for outgoing connections */
313 if (cnat_snat_policy_interface_enabled (in_if, af))
314 if (cnat_search_snat_prefix (dst_addr, af))
315 /* Destination is not in the prefixes that don't require snat */
318 /* source nat for translations that come from the outside:
319 src not not a pod interface, dst not a pod interface */
320 if (!clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_POD],
322 !clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_POD],
326 ip6_address_is_equal (&src_addr->ip6,
327 &ip_addr_v6 (&cpm->snat_ip6.ce_ip)))
330 ip4_address_is_equal (&src_addr->ip4,
331 &ip_addr_v4 (&cpm->snat_ip4.ce_ip)))
336 /* handle the case where a container is connecting to itself via a service */
337 if (ip46_address_is_equal (src_addr, dst_addr))
344 cnat_set_snat (ip4_address_t *ip4, ip6_address_t *ip6, u32 sw_if_index)
346 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
350 cnat_translation_unwatch_addr (INDEX_INVALID, CNAT_RESOLV_ADDR_SNAT);
352 ip_address_set (&cpm->snat_ip4.ce_ip, ip4, AF_IP4);
353 ip_address_set (&cpm->snat_ip6.ce_ip, ip6, AF_IP6);
354 cpm->snat_ip4.ce_sw_if_index = sw_if_index;
355 cpm->snat_ip6.ce_sw_if_index = sw_if_index;
357 cnat_resolve_ep (&cpm->snat_ip4);
358 cnat_resolve_ep (&cpm->snat_ip6);
359 cnat_translation_watch_addr (INDEX_INVALID, 0, &cpm->snat_ip4,
360 CNAT_RESOLV_ADDR_SNAT);
361 cnat_translation_watch_addr (INDEX_INVALID, 0, &cpm->snat_ip6,
362 CNAT_RESOLV_ADDR_SNAT);
365 static clib_error_t *
366 cnat_set_snat_cli (vlib_main_t *vm, unformat_input_t *input,
367 vlib_cli_command_t *cmd)
369 unformat_input_t _line_input, *line_input = &_line_input;
370 vnet_main_t *vnm = vnet_get_main ();
371 ip4_address_t ip4 = { { 0 } };
372 ip6_address_t ip6 = { { 0 } };
374 u32 sw_if_index = INDEX_INVALID;
378 /* Get a line of input. */
379 if (!unformat_user (input, unformat_line_input, line_input))
382 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
384 if (unformat_user (line_input, unformat_ip4_address, &ip4))
386 else if (unformat_user (line_input, unformat_ip6_address, &ip6))
388 else if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
393 e = clib_error_return (0, "unknown input '%U'",
394 format_unformat_error, input);
399 cnat_set_snat (&ip4, &ip6, sw_if_index);
402 unformat_free (line_input);
407 VLIB_CLI_COMMAND (cnat_set_snat_command, static) = {
408 .path = "set cnat snat-policy addr",
410 "set cnat snat-policy addr [<ip4-address>][<ip6-address>][sw_if_index]",
411 .function = cnat_set_snat_cli,
414 static clib_error_t *
415 cnat_snat_policy_add_del_pfx_command_fn (vlib_main_t *vm,
416 unformat_input_t *input,
417 vlib_cli_command_t *cmd)
423 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
425 if (unformat (input, "%U", unformat_ip_prefix, &pfx))
427 else if (unformat (input, "del"))
430 return (clib_error_return (0, "unknown input '%U'",
431 format_unformat_error, input));
435 rv = cnat_snat_policy_add_pfx (&pfx);
437 rv = cnat_snat_policy_del_pfx (&pfx);
440 return (clib_error_return (0, "error %d", rv, input));
445 VLIB_CLI_COMMAND (cnat_snat_policy_add_del_pfx_command, static) = {
446 .path = "set cnat snat-policy prefix",
447 .short_help = "set cnat snat-policy prefix [del] [prefix]",
448 .function = cnat_snat_policy_add_del_pfx_command_fn,
451 static clib_error_t *
452 cnat_show_snat (vlib_main_t *vm, unformat_input_t *input,
453 vlib_cli_command_t *cmd)
455 cnat_snat_exclude_pfx_table_t *excluded_pfx =
456 &cnat_snat_policy_main.excluded_pfx;
457 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
458 vnet_main_t *vnm = vnet_get_main ();
461 vlib_cli_output (vm, "Source NAT\n ip4: %U\n ip6: %U\n\n",
462 format_cnat_endpoint, &cpm->snat_ip4, format_cnat_endpoint,
464 vlib_cli_output (vm, "Excluded prefixes:\n %U\n", format_bihash_24_8,
465 &excluded_pfx->ip_hash, 1);
467 for (int i = 0; i < CNAT_N_SNAT_IF_MAP; i++)
469 vlib_cli_output (vm, "\n%U interfaces:\n",
470 format_cnat_snat_interface_map_type, i);
471 clib_bitmap_foreach (sw_if_index, cpm->interface_maps[i])
472 vlib_cli_output (vm, " %U\n", format_vnet_sw_if_index_name, vnm,
479 VLIB_CLI_COMMAND (cnat_show_snat_command, static) = {
480 .path = "show cnat snat-policy",
481 .short_help = "show cnat snat-policy",
482 .function = cnat_show_snat,
486 cnat_set_snat_policy (cnat_snat_policy_type_t policy)
488 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
491 case CNAT_SNAT_POLICY_NONE:
492 cpm->snat_policy = cnat_snat_policy_none;
494 case CNAT_SNAT_POLICY_IF_PFX:
495 cpm->snat_policy = cnat_snat_policy_if_pfx;
497 case CNAT_SNAT_POLICY_K8S:
498 cpm->snat_policy = cnat_snat_policy_k8s;
506 static clib_error_t *
507 cnat_snat_policy_set_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
508 vlib_cli_command_t *cmd)
510 cnat_snat_policy_type_t policy = CNAT_SNAT_POLICY_NONE;
511 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
513 if (unformat (input, "none"))
515 else if (unformat (input, "if-pfx"))
516 policy = CNAT_SNAT_POLICY_IF_PFX;
517 else if (unformat (input, "k8s"))
518 policy = CNAT_SNAT_POLICY_K8S;
520 return clib_error_return (0, "unknown input '%U'",
521 format_unformat_error, input);
524 cnat_set_snat_policy (policy);
528 VLIB_CLI_COMMAND (cnat_snat_policy_set_cmd, static) = {
529 .path = "set cnat snat-policy",
530 .short_help = "set cnat snat-policy [none][if-pfx][k8s]",
531 .function = cnat_snat_policy_set_cmd_fn,
535 cnat_if_addr_add_del_snat_cb (addr_resolution_t *ar, ip_address_t *address,
538 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
541 ep = AF_IP4 == ar->af ? &cpm->snat_ip4 : &cpm->snat_ip6;
543 if (!is_del && ep->ce_flags & CNAT_EP_FLAG_RESOLVED)
548 ep->ce_flags &= ~CNAT_EP_FLAG_RESOLVED;
549 /* Are there remaining addresses ? */
550 if (0 == cnat_resolve_addr (ar->sw_if_index, ar->af, address))
556 ip_address_copy (&ep->ce_ip, address);
557 ep->ce_flags |= CNAT_EP_FLAG_RESOLVED;
561 static clib_error_t *
562 cnat_snat_init (vlib_main_t *vm)
564 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
565 cnat_main_t *cm = &cnat_main;
566 cnat_snat_exclude_pfx_table_t *excluded_pfx = &cpm->excluded_pfx;
569 for (i = 0; i < ARRAY_LEN (excluded_pfx->ip_masks); i++)
576 for (j = 0; j < i0; j++)
577 excluded_pfx->ip_masks[i].as_u32[j] = ~0;
580 excluded_pfx->ip_masks[i].as_u32[i0] =
581 clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
583 clib_bihash_init_24_8 (&excluded_pfx->ip_hash, "snat prefixes",
584 cm->snat_hash_buckets, cm->snat_hash_memory);
585 clib_bihash_set_kvp_format_fn_24_8 (&excluded_pfx->ip_hash,
586 format_cnat_snat_prefix);
588 for (int i = 0; i < CNAT_N_SNAT_IF_MAP; i++)
589 clib_bitmap_validate (cpm->interface_maps[i], cm->snat_if_map_length);
591 cnat_translation_register_addr_add_cb (CNAT_RESOLV_ADDR_SNAT,
592 cnat_if_addr_add_del_snat_cb);
594 cpm->snat_policy = cnat_snat_policy_none;
599 VLIB_INIT_FUNCTION (cnat_snat_init);
602 * fd.io coding-style-patch-verification: ON
605 * eval: (c-set-style "gnu")