2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/ip/ip.h>
17 #include <cnat/cnat_snat_policy.h>
18 #include <cnat/cnat_translation.h>
20 cnat_snat_policy_main_t cnat_snat_policy_main;
23 unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args)
25 u8 *a = va_arg (*args, u8 *);
26 if (unformat (input, "include-v4"))
27 *a = CNAT_SNAT_IF_MAP_INCLUDE_V4;
28 else if (unformat (input, "include-v6"))
29 *a = CNAT_SNAT_IF_MAP_INCLUDE_V6;
30 else if (unformat (input, "k8s"))
31 *a = CNAT_SNAT_IF_MAP_INCLUDE_POD;
38 format_cnat_snat_interface_map_type (u8 *s, va_list *args)
40 cnat_snat_interface_map_type_t mtype = va_arg (*args, int);
43 case CNAT_SNAT_IF_MAP_INCLUDE_V4:
44 s = format (s, "Included v4");
46 case CNAT_SNAT_IF_MAP_INCLUDE_V6:
47 s = format (s, "Included v6");
49 case CNAT_SNAT_IF_MAP_INCLUDE_POD:
50 s = format (s, "k8s pod");
53 s = format (s, "(unknown)");
60 format_cnat_snat_prefix (u8 *s, va_list *args)
62 clib_bihash_kv_24_8_t *kv = va_arg (*args, clib_bihash_kv_24_8_t *);
63 CLIB_UNUSED (int verbose) = va_arg (*args, int);
64 u32 af = kv->key[2] >> 32;
65 u32 len = kv->key[2] & 0xffffffff;
67 s = format (s, "%U/%d", format_ip4_address, &kv->key[0], len);
69 s = format (s, "%U/%d", format_ip6_address, &kv->key[0], len);
74 cnat_compute_prefix_lengths_in_search_order (
75 cnat_snat_exclude_pfx_table_t *table, ip_address_family_t af)
78 vec_reset_length (table->meta[af].prefix_lengths_in_search_order);
79 /* Note: bitmap reversed so this is in fact a longest prefix match */
80 clib_bitmap_foreach (i, table->meta[af].non_empty_dst_address_length_bitmap)
82 int dst_address_length = 128 - i;
83 vec_add1 (table->meta[af].prefix_lengths_in_search_order,
89 cnat_snat_policy_add_del_if (u32 sw_if_index, u8 is_add,
90 cnat_snat_interface_map_type_t table)
92 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
94 if (table >= ARRAY_LEN (cpm->interface_maps))
95 return VNET_API_ERROR_INVALID_VALUE;
97 clib_bitmap_t **map = &cpm->interface_maps[table];
99 *map = clib_bitmap_set (*map, sw_if_index, is_add);
103 static clib_error_t *
104 cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm,
105 unformat_input_t *input,
106 vlib_cli_command_t *cmd)
108 vnet_main_t *vnm = vnet_get_main ();
110 u32 sw_if_index = ~0;
114 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
116 if (unformat (input, "del"))
118 else if (unformat (input, "table %U",
119 unformat_cnat_snat_interface_map_type, &table))
121 else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
125 return clib_error_return (0, "unknown input '%U'",
126 format_unformat_error, input);
129 if (sw_if_index == ~0)
130 return clib_error_return (0, "Interface not specified");
132 rv = cnat_snat_policy_add_del_if (sw_if_index, is_add, table);
135 return clib_error_return (0, "Error %d", rv);
140 VLIB_CLI_COMMAND (cnat_snat_policy_add_del_if_command, static) = {
141 .path = "set cnat snat-policy if",
142 .short_help = "set cnat snat-policy if [del]"
143 "[table [include-v4 include-v6 k8s]] [interface]",
144 .function = cnat_snat_policy_add_del_if_command_fn,
148 cnat_snat_policy_add_pfx (ip_prefix_t *pfx)
150 /* All packets destined to this prefix won't be source-NAT-ed */
151 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
152 clib_bihash_kv_24_8_t kv;
154 u64 af = ip_prefix_version (pfx);
157 mask = &table->ip_masks[pfx->len];
160 kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
165 kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
166 kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
168 kv.key[2] = ((u64) af << 32) | pfx->len;
169 clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 1 /* is_add */);
171 table->meta[af].dst_address_length_refcounts[pfx->len]++;
172 table->meta[af].non_empty_dst_address_length_bitmap = clib_bitmap_set (
173 table->meta[af].non_empty_dst_address_length_bitmap, 128 - pfx->len, 1);
174 cnat_compute_prefix_lengths_in_search_order (table, af);
179 cnat_snat_policy_del_pfx (ip_prefix_t *pfx)
181 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
182 clib_bihash_kv_24_8_t kv, val;
184 u64 af = ip_prefix_version (pfx);
187 mask = &table->ip_masks[pfx->len];
190 kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
195 kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
196 kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
198 kv.key[2] = ((u64) af << 32) | pfx->len;
200 if (clib_bihash_search_24_8 (&table->ip_hash, &kv, &val))
204 clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 0 /* is_add */);
205 /* refcount accounting */
206 ASSERT (table->meta[af].dst_address_length_refcounts[pfx->len] > 0);
207 if (--table->meta[af].dst_address_length_refcounts[pfx->len] == 0)
209 table->meta[af].non_empty_dst_address_length_bitmap =
210 clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap,
212 cnat_compute_prefix_lengths_in_search_order (table, af);
218 cnat_search_snat_prefix (ip46_address_t *addr, ip_address_family_t af)
220 /* Returns 0 if addr matches any of the listed prefixes */
221 cnat_snat_exclude_pfx_table_t *table = &cnat_snat_policy_main.excluded_pfx;
222 clib_bihash_kv_24_8_t kv, val;
224 n_p = vec_len (table->meta[af].prefix_lengths_in_search_order);
227 kv.key[0] = addr->ip4.as_u32;
232 kv.key[0] = addr->as_u64[0];
233 kv.key[1] = addr->as_u64[1];
237 * start search from a mask length same length or shorter.
238 * we don't want matches longer than the mask passed
243 int dst_address_length =
244 table->meta[af].prefix_lengths_in_search_order[i];
245 ip6_address_t *mask = &table->ip_masks[dst_address_length];
247 ASSERT (dst_address_length >= 0 && dst_address_length <= 128);
248 /* As lengths are decreasing, masks are increasingly specific. */
249 kv.key[0] &= mask->as_u64[0];
250 kv.key[1] &= mask->as_u64[1];
251 kv.key[2] = ((u64) af << 32) | dst_address_length;
252 rv = clib_bihash_search_inline_2_24_8 (&table->ip_hash, &kv, &val);
259 static_always_inline int
260 cnat_snat_policy_interface_enabled (u32 sw_if_index, ip_address_family_t af)
262 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
263 return clib_bitmap_get (cpm->interface_maps[af], sw_if_index);
267 cnat_snat_policy_none (vlib_buffer_t *b, cnat_session_t *session)
269 /* srcNAT everything by default */
274 cnat_snat_policy_if_pfx (vlib_buffer_t *b, cnat_session_t *session)
276 ip46_address_t *dst_addr = &session->key.cs_ip[VLIB_TX];
277 u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
278 ip_address_family_t af = session->key.cs_af;
280 /* source nat for outgoing connections */
281 if (cnat_snat_policy_interface_enabled (in_if, af))
282 if (cnat_search_snat_prefix (dst_addr, af))
283 /* Destination is not in the prefixes that don't require snat */
289 cnat_snat_policy_k8s (vlib_buffer_t *b, cnat_session_t *session)
291 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
292 ip_address_family_t af = session->key.cs_af;
294 ip46_address_t *src_addr = &session->key.cs_ip[VLIB_RX];
295 ip46_address_t *dst_addr = &session->key.cs_ip[VLIB_TX];
296 u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
297 u32 out_if = vnet_buffer (b)->sw_if_index[VLIB_TX];
299 /* source nat for outgoing connections */
300 if (cnat_snat_policy_interface_enabled (in_if, af))
301 if (cnat_search_snat_prefix (dst_addr, af))
302 /* Destination is not in the prefixes that don't require snat */
305 /* source nat for translations that come from the outside:
306 src not not a pod interface, dst not a pod interface */
307 if (!clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_POD],
309 !clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_POD],
313 ip6_address_is_equal (&src_addr->ip6,
314 &ip_addr_v6 (&cpm->snat_ip6.ce_ip)))
317 ip4_address_is_equal (&src_addr->ip4,
318 &ip_addr_v4 (&cpm->snat_ip4.ce_ip)))
323 /* handle the case where a container is connecting to itself via a service */
324 if (ip46_address_is_equal (src_addr, dst_addr))
331 cnat_set_snat (ip4_address_t *ip4, ip6_address_t *ip6, u32 sw_if_index)
333 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
337 cnat_translation_unwatch_addr (INDEX_INVALID, CNAT_RESOLV_ADDR_SNAT);
339 ip_address_set (&cpm->snat_ip4.ce_ip, ip4, AF_IP4);
340 ip_address_set (&cpm->snat_ip6.ce_ip, ip6, AF_IP6);
341 cpm->snat_ip4.ce_sw_if_index = sw_if_index;
342 cpm->snat_ip6.ce_sw_if_index = sw_if_index;
344 cnat_resolve_ep (&cpm->snat_ip4);
345 cnat_resolve_ep (&cpm->snat_ip6);
346 cnat_translation_watch_addr (INDEX_INVALID, 0, &cpm->snat_ip4,
347 CNAT_RESOLV_ADDR_SNAT);
348 cnat_translation_watch_addr (INDEX_INVALID, 0, &cpm->snat_ip6,
349 CNAT_RESOLV_ADDR_SNAT);
352 static clib_error_t *
353 cnat_set_snat_cli (vlib_main_t *vm, unformat_input_t *input,
354 vlib_cli_command_t *cmd)
356 unformat_input_t _line_input, *line_input = &_line_input;
357 vnet_main_t *vnm = vnet_get_main ();
358 ip4_address_t ip4 = { { 0 } };
359 ip6_address_t ip6 = { { 0 } };
361 u32 sw_if_index = INDEX_INVALID;
365 /* Get a line of input. */
366 if (!unformat_user (input, unformat_line_input, line_input))
369 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
371 if (unformat_user (line_input, unformat_ip4_address, &ip4))
373 else if (unformat_user (line_input, unformat_ip6_address, &ip6))
375 else if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
380 e = clib_error_return (0, "unknown input '%U'",
381 format_unformat_error, input);
386 cnat_set_snat (&ip4, &ip6, sw_if_index);
389 unformat_free (line_input);
394 VLIB_CLI_COMMAND (cnat_set_snat_command, static) = {
395 .path = "set cnat snat-policy addr",
397 "set cnat snat-policy addr [<ip4-address>][<ip6-address>][sw_if_index]",
398 .function = cnat_set_snat_cli,
401 static clib_error_t *
402 cnat_snat_policy_add_del_pfx_command_fn (vlib_main_t *vm,
403 unformat_input_t *input,
404 vlib_cli_command_t *cmd)
410 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
412 if (unformat (input, "%U", unformat_ip_prefix, &pfx))
414 else if (unformat (input, "del"))
417 return (clib_error_return (0, "unknown input '%U'",
418 format_unformat_error, input));
422 rv = cnat_snat_policy_add_pfx (&pfx);
424 rv = cnat_snat_policy_del_pfx (&pfx);
427 return (clib_error_return (0, "error %d", rv, input));
432 VLIB_CLI_COMMAND (cnat_snat_policy_add_del_pfx_command, static) = {
433 .path = "set cnat snat-policy prefix",
434 .short_help = "set cnat snat-policy prefix [del] [prefix]",
435 .function = cnat_snat_policy_add_del_pfx_command_fn,
438 static clib_error_t *
439 cnat_show_snat (vlib_main_t *vm, unformat_input_t *input,
440 vlib_cli_command_t *cmd)
442 cnat_snat_exclude_pfx_table_t *excluded_pfx =
443 &cnat_snat_policy_main.excluded_pfx;
444 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
445 vnet_main_t *vnm = vnet_get_main ();
448 vlib_cli_output (vm, "Source NAT\n ip4: %U\n ip6: %U\n\n",
449 format_cnat_endpoint, &cpm->snat_ip4, format_cnat_endpoint,
451 vlib_cli_output (vm, "Excluded prefixes:\n %U\n", format_bihash_24_8,
452 &excluded_pfx->ip_hash, 1);
454 for (int i = 0; i < CNAT_N_SNAT_IF_MAP; i++)
456 vlib_cli_output (vm, "\n%U interfaces:\n",
457 format_cnat_snat_interface_map_type, i);
458 clib_bitmap_foreach (sw_if_index, cpm->interface_maps[i])
459 vlib_cli_output (vm, " %U\n", format_vnet_sw_if_index_name, vnm,
466 VLIB_CLI_COMMAND (cnat_show_snat_command, static) = {
467 .path = "show cnat snat-policy",
468 .short_help = "show cnat snat-policy",
469 .function = cnat_show_snat,
473 cnat_set_snat_policy (cnat_snat_policy_type_t policy)
475 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
478 case CNAT_SNAT_POLICY_NONE:
479 cpm->snat_policy = cnat_snat_policy_none;
481 case CNAT_SNAT_POLICY_IF_PFX:
482 cpm->snat_policy = cnat_snat_policy_if_pfx;
484 case CNAT_SNAT_POLICY_K8S:
485 cpm->snat_policy = cnat_snat_policy_k8s;
493 static clib_error_t *
494 cnat_snat_policy_set_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
495 vlib_cli_command_t *cmd)
497 cnat_snat_policy_type_t policy = CNAT_SNAT_POLICY_NONE;
498 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
500 if (unformat (input, "none"))
502 else if (unformat (input, "if-pfx"))
503 policy = CNAT_SNAT_POLICY_IF_PFX;
504 else if (unformat (input, "k8s"))
505 policy = CNAT_SNAT_POLICY_K8S;
507 return clib_error_return (0, "unknown input '%U'",
508 format_unformat_error, input);
511 cnat_set_snat_policy (policy);
515 VLIB_CLI_COMMAND (cnat_snat_policy_set_cmd, static) = {
516 .path = "set cnat snat-policy",
517 .short_help = "set cnat snat-policy [none][if-pfx][k8s]",
518 .function = cnat_snat_policy_set_cmd_fn,
522 cnat_if_addr_add_del_snat_cb (addr_resolution_t *ar, ip_address_t *address,
525 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
528 ep = AF_IP4 == ar->af ? &cpm->snat_ip4 : &cpm->snat_ip6;
530 if (!is_del && ep->ce_flags & CNAT_EP_FLAG_RESOLVED)
535 ep->ce_flags &= ~CNAT_EP_FLAG_RESOLVED;
536 /* Are there remaining addresses ? */
537 if (0 == cnat_resolve_addr (ar->sw_if_index, ar->af, address))
543 ip_address_copy (&ep->ce_ip, address);
544 ep->ce_flags |= CNAT_EP_FLAG_RESOLVED;
548 static clib_error_t *
549 cnat_snat_init (vlib_main_t *vm)
551 cnat_snat_policy_main_t *cpm = &cnat_snat_policy_main;
552 cnat_main_t *cm = &cnat_main;
553 cnat_snat_exclude_pfx_table_t *excluded_pfx = &cpm->excluded_pfx;
556 for (i = 0; i < ARRAY_LEN (excluded_pfx->ip_masks); i++)
563 for (j = 0; j < i0; j++)
564 excluded_pfx->ip_masks[i].as_u32[j] = ~0;
567 excluded_pfx->ip_masks[i].as_u32[i0] =
568 clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
570 clib_bihash_init_24_8 (&excluded_pfx->ip_hash, "snat prefixes",
571 cm->snat_hash_buckets, cm->snat_hash_memory);
572 clib_bihash_set_kvp_format_fn_24_8 (&excluded_pfx->ip_hash,
573 format_cnat_snat_prefix);
575 for (int i = 0; i < CNAT_N_SNAT_IF_MAP; i++)
576 clib_bitmap_validate (cpm->interface_maps[i], cm->snat_if_map_length);
578 cnat_translation_register_addr_add_cb (CNAT_RESOLV_ADDR_SNAT,
579 cnat_if_addr_add_del_snat_cb);
581 cpm->snat_policy = cnat_snat_policy_none;
586 VLIB_INIT_FUNCTION (cnat_snat_init);
589 * fd.io coding-style-patch-verification: ON
592 * eval: (c-set-style "gnu")