X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Fip6_forward.c;h=0ed20eea4ee7c1e26d036b01af81879f7d71cb1e;hb=f2984bbb0;hp=88b21d59019aa9eb0a86f0744c6f45129e61f813;hpb=f3a3bad8006afb0b379761dfe4d5d25de07b32cc;p=vpp.git diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 88b21d59019..0ed20eea4ee 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -39,7 +39,8 @@ #include #include -#include +#include +#include #include /* for ethernet_header_t */ #include /* for srp_hw_interface_class */ #include @@ -48,30 +49,63 @@ #include #include #include +#include +#ifndef CLIB_MARCH_VARIANT #include +#endif #include +#include +#include /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */ #define OI_DECAP 0x80000000 static void -ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, - ip6_main_t * im, u32 fib_index, - ip_interface_address_t * a) +ip6_add_interface_prefix_routes (ip6_main_t * im, + u32 sw_if_index, + u32 fib_index, + ip6_address_t * address, u32 address_length) { ip_lookup_main_t *lm = &im->lookup_main; - ip6_address_t *address = ip_interface_address_get_address (lm, a); - fib_prefix_t pfx = { - .fp_len = a->address_length, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = *address, + ip_interface_prefix_t *if_prefix; + + /* *INDENT-OFF* */ + ip_interface_prefix_key_t key = { + .prefix = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = { + .as_u64 = { + address->as_u64[0] & im->fib_masks[address_length].as_u64[0], + address->as_u64[1] & im->fib_masks[address_length].as_u64[1], + }, + }, + }, + .sw_if_index = sw_if_index, }; + /* *INDENT-ON* */ + + /* If prefix already set on interface, just increment ref count & return */ + if_prefix = ip_get_interface_prefix (lm, &key); + if (if_prefix) + { + if_prefix->ref_count += 1; + return; + } + + /* New prefix - allocate a pool entry, initialize it, add to the hash */ + pool_get (lm->if_prefix_pool, if_prefix); + if_prefix->ref_count = 1; + clib_memcpy (&if_prefix->key, &key, sizeof (key)); + mhash_set (&lm->prefix_to_if_prefix_index, &key, + if_prefix - lm->if_prefix_pool, 0 /* old value */ ); - if (a->address_length < 128) + /* length < 128 - add glean */ + if (address_length < 128) { - fib_table_entry_update_one_path (fib_index, - &pfx, + /* set the glean route for the prefix */ + fib_table_entry_update_one_path (fib_index, &key.prefix, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), @@ -80,9 +114,27 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, NULL, sw_if_index, /* invalid FIB index */ ~0, 1, - /* no label stack */ + /* no out-label stack */ NULL, FIB_ROUTE_PATH_FLAG_NONE); } +} + +static void +ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, + ip6_main_t * im, u32 fib_index, + ip_interface_address_t * a) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip6_address_t *address = ip_interface_address_get_address (lm, a); + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + /* set special routes for the prefix if needed */ + ip6_add_interface_prefix_routes (im, sw_if_index, fib_index, + address, a->address_length); pfx.fp_len = 128; if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) @@ -117,30 +169,79 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, } static void -ip6_del_interface_routes (ip6_main_t * im, +ip6_del_interface_prefix_routes (ip6_main_t * im, + u32 sw_if_index, + u32 fib_index, + ip6_address_t * address, u32 address_length) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip_interface_prefix_t *if_prefix; + + /* *INDENT-OFF* */ + ip_interface_prefix_key_t key = { + .prefix = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = { + .as_u64 = { + address->as_u64[0] & im->fib_masks[address_length].as_u64[0], + address->as_u64[1] & im->fib_masks[address_length].as_u64[1], + }, + }, + }, + .sw_if_index = sw_if_index, + }; + /* *INDENT-ON* */ + + if_prefix = ip_get_interface_prefix (lm, &key); + if (!if_prefix) + { + clib_warning ("Prefix not found while deleting %U", + format_ip4_address_and_length, address, address_length); + return; + } + + /* If not deleting last intf addr in prefix, decrement ref count & return */ + if_prefix->ref_count -= 1; + if (if_prefix->ref_count > 0) + return; + + /* length <= 128, delete glean route */ + if (address_length <= 128) + { + /* remove glean route for prefix */ + fib_table_entry_delete (fib_index, &key.prefix, FIB_SOURCE_INTERFACE); + } + + mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */ ); + pool_put (lm->if_prefix_pool, if_prefix); +} + +static void +ip6_del_interface_routes (u32 sw_if_index, ip6_main_t * im, u32 fib_index, ip6_address_t * address, u32 address_length) { fib_prefix_t pfx = { - .fp_len = address_length, + .fp_len = 128, .fp_proto = FIB_PROTOCOL_IP6, .fp_addr.ip6 = *address, }; - if (pfx.fp_len < 128) - { - fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); + /* delete special routes for the prefix if needed */ + ip6_del_interface_prefix_routes (im, sw_if_index, fib_index, + address, address_length); - } - - pfx.fp_len = 128; fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); } +#ifndef CLIB_MARCH_VARIANT void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) { ip6_main_t *im = &ip6_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); @@ -166,6 +267,11 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) vnet_feature_enable_disable ("ip6-multicast", "ip6-not-enabled", sw_if_index, !is_enable, 0, 0); + + if (is_enable) + hi->l3_if_count++; + else if (hi->l3_if_count) + hi->l3_if_count--; } /* get first interface address */ @@ -197,9 +303,10 @@ ip6_add_del_interface_address (vlib_main_t * vm, vnet_main_t *vnm = vnet_get_main (); ip6_main_t *im = &ip6_main; ip_lookup_main_t *lm = &im->lookup_main; - clib_error_t *error; + clib_error_t *error = NULL; u32 if_address_index; ip6_address_fib_t ip6_af, *addr_fib = 0; + const ip6_address_t *ll_addr; /* local0 interface doesn't support IP addressing */ if (sw_if_index == 0) @@ -208,6 +315,45 @@ ip6_add_del_interface_address (vlib_main_t * vm, clib_error_create ("local0 interface doesn't support IP addressing"); } + if (ip6_address_is_link_local_unicast (address)) + { + if (address_length != 128) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH; + return + clib_error_create + ("prefix length of link-local address must be 128"); + } + if (!is_del) + { + int rv; + + rv = ip6_link_set_local_address (sw_if_index, address); + + if (rv) + { + vnm->api_errno = rv; + return clib_error_create ("address not assignable"); + } + } + else + { + ll_addr = ip6_get_link_local_address (sw_if_index); + if (ip6_address_is_equal (ll_addr, address)) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_DELETABLE; + return clib_error_create ("address not deletable"); + } + else + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; + return clib_error_create ("address not found"); + } + } + + return (NULL); + } + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); @@ -215,42 +361,169 @@ ip6_add_del_interface_address (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip6_af); - { - uword elts_before = pool_elts (lm->if_address_pool); + /* *INDENT-OFF* */ + if (!is_del) + { + /* When adding an address check that it does not conflict + with an existing address on any interface in this table. */ + ip_interface_address_t *ia; + vnet_sw_interface_t *sif; + + pool_foreach (sif, vnm->interface_main.sw_interfaces) + { + if (im->fib_index_by_sw_if_index[sw_if_index] == + im->fib_index_by_sw_if_index[sif->sw_if_index]) + { + foreach_ip_interface_address + (&im->lookup_main, ia, sif->sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip6_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + + if (ip6_destination_matches_route + (im, address, x, ia->address_length) || + ip6_destination_matches_route (im, + x, + address, + address_length)) + { + /* an intf may have >1 addr from the same prefix */ + if ((sw_if_index == sif->sw_if_index) && + (ia->address_length == address_length) && + !ip6_address_is_equal (x, address)) + continue; + + if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) + /* if the address we're comparing against is stale + * then the CP has not added this one back yet, maybe + * it never will, so we have to assume it won't and + * ignore it. if it does add it back, then it will fail + * because this one is now present */ + continue; + + /* error if the length or intf was different */ + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + error = clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip6_address_and_length, address, + address_length, + format_ip6_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + goto done; + } + })); + } + } + } + /* *INDENT-ON* */ - error = ip_interface_address_add_del - (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index); - if (error) - goto done; + if_address_index = ip_interface_address_find (lm, addr_fib, address_length); - /* Pool did not grow: add duplicate address. */ - if (elts_before == pool_elts (lm->if_address_pool)) - goto done; - } + if (is_del) + { + if (~0 == if_address_index) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; + error = clib_error_create ("%U not found for interface %U", + lm->format_address_and_length, + addr_fib, address_length, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + } + + error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib, + address_length, sw_if_index); + if (error) + goto done; + } + else + { + if (~0 != if_address_index) + { + ip_interface_address_t *ia; + + ia = pool_elt_at_index (lm->if_address_pool, if_address_index); + + if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) + { + if (ia->sw_if_index == sw_if_index) + { + /* re-adding an address during the replace action. + * consdier this the update. clear the flag and + * we're done */ + ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE; + goto done; + } + else + { + /* The prefix is moving from one interface to another. + * delete the stale and add the new */ + ip6_add_del_interface_address (vm, + ia->sw_if_index, + address, address_length, 1); + ia = NULL; + error = ip_interface_address_add (lm, sw_if_index, + addr_fib, address_length, + &if_address_index); + } + } + else + { + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + error = clib_error_create + ("Prefix %U already found on interface %U", + lm->format_address_and_length, addr_fib, address_length, + format_vnet_sw_if_index_name, vnm, ia->sw_if_index); + } + } + else + error = ip_interface_address_add (lm, sw_if_index, + addr_fib, address_length, + &if_address_index); + } + + if (error) + goto done; ip6_sw_interface_enable_disable (sw_if_index, !is_del); + if (!is_del) + ip6_link_enable (sw_if_index, NULL); - if (is_del) - ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length); - else - ip6_add_interface_routes (vnm, sw_if_index, - im, ip6_af.fib_index, - pool_elt_at_index (lm->if_address_pool, - if_address_index)); + /* intf addr routes are added/deleted on admin up/down */ + if (vnet_sw_interface_is_admin_up (vnm, sw_if_index)) + { + if (is_del) + ip6_del_interface_routes (sw_if_index, + im, ip6_af.fib_index, address, + address_length); + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, + if_address_index)); + } - { - ip6_add_del_interface_address_callback_t *cb; - vec_foreach (cb, im->add_del_interface_address_callbacks) - cb->function (im, cb->function_opaque, sw_if_index, - address, address_length, if_address_index, is_del); - } + ip6_add_del_interface_address_callback_t *cb; + vec_foreach (cb, im->add_del_interface_address_callbacks) + cb->function (im, cb->function_opaque, sw_if_index, + address, address_length, if_address_index, is_del); + + if (is_del) + ip6_link_disable (sw_if_index); done: vec_free (addr_fib); return error; } -clib_error_t * +#endif + +static clib_error_t * ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { ip6_main_t *im = &ip6_main; @@ -279,7 +552,7 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) im, fib_index, ia); else - ip6_del_interface_routes (im, fib_index, + ip6_del_interface_routes (sw_if_index, im, fib_index, a, ia->address_length); })); /* *INDENT-ON* */ @@ -295,6 +568,7 @@ VNET_FEATURE_ARC_INIT (ip6_unicast, static) = { .arc_name = "ip6-unicast", .start_nodes = VNET_FEATURES ("ip6-input"), + .last_in_arc = "ip6-lookup", .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index, }; @@ -316,13 +590,13 @@ VNET_FEATURE_INIT (ip6_policer_classify, static) = { .arc_name = "ip6-unicast", .node_name = "ip6-policer-classify", - .runs_before = VNET_FEATURES ("ipsec-input-ip6"), + .runs_before = VNET_FEATURES ("ipsec6-input-feature"), }; VNET_FEATURE_INIT (ip6_ipsec, static) = { .arc_name = "ip6-unicast", - .node_name = "ipsec-input-ip6", + .node_name = "ipsec6-input-feature", .runs_before = VNET_FEATURES ("l2tp-decap"), }; @@ -366,6 +640,7 @@ VNET_FEATURE_ARC_INIT (ip6_multicast, static) = { .arc_name = "ip6-multicast", .start_nodes = VNET_FEATURES ("ip6-input"), + .last_in_arc = "ip6-mfib-forward-lookup", .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index, }; @@ -392,18 +667,19 @@ VNET_FEATURE_ARC_INIT (ip6_output, static) = { .arc_name = "ip6-output", .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain", "ip6-dvr-dpo"), + .last_in_arc = "interface-output", .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index, }; VNET_FEATURE_INIT (ip6_outacl, static) = { .arc_name = "ip6-output", .node_name = "ip6-outacl", - .runs_before = VNET_FEATURES ("ipsec-output-ip6"), + .runs_before = VNET_FEATURES ("ipsec6-output-feature"), }; VNET_FEATURE_INIT (ip6_ipsec_output, static) = { .arc_name = "ip6-output", - .node_name = "ipsec-output-ip6", + .node_name = "ipsec6-output-feature", .runs_before = VNET_FEATURES ("interface-output"), }; @@ -414,7 +690,7 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = { }; /* *INDENT-ON* */ -clib_error_t * +static clib_error_t * ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) { ip6_main_t *im = &ip6_main; @@ -431,7 +707,6 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) ip6_address_t *address; vlib_main_t *vm = vlib_get_main (); - ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ ); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); /* *INDENT-OFF* */ foreach_ip_interface_address (lm6, ia, sw_if_index, 0, @@ -454,9 +729,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del); -static uword -ip6_lookup (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip6_lookup_inline (vm, node, frame); } @@ -466,7 +741,6 @@ static u8 *format_ip6_lookup_trace (u8 * s, va_list * args); /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_lookup_node) = { - .function = ip6_lookup, .name = "ip6-lookup", .vector_size = sizeof (u32), .format_trace = format_ip6_lookup_trace, @@ -475,229 +749,185 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup); - -always_inline uword -ip6_load_balance (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; - u32 n_left_from, n_left_to_next, *from, *to_next; - ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); + u32 n_left, *from; + u32 thread_index = vm->thread_index; ip6_main_t *im = &ip6_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next; from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next = node->cached_next_index; + n_left = frame->n_vectors; + next = nexts; - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace (vm, node, frame, VLIB_TX); + vlib_get_buffers (vm, from, bufs, n_left); - while (n_left_from > 0) + while (n_left >= 4) { - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + const load_balance_t *lb0, *lb1; + const ip6_header_t *ip0, *ip1; + u32 lbi0, hc0, lbi1, hc1; + const dpo_id_t *dpo0, *dpo1; - - while (n_left_from >= 4 && n_left_to_next >= 2) + /* Prefetch next iteration. */ + { + vlib_prefetch_buffer_header (b[2], STORE); + vlib_prefetch_buffer_header (b[3], STORE); + + CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), STORE); + } + + ip0 = vlib_buffer_get_current (b[0]); + ip1 = vlib_buffer_get_current (b[1]); + lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = hc1 = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) { - ip_lookup_next_t next0, next1; - const load_balance_t *lb0, *lb1; - vlib_buffer_t *p0, *p1; - u32 pi0, lbi0, hc0, pi1, lbi1, hc1; - const ip6_header_t *ip0, *ip1; - const dpo_id_t *dpo0, *dpo1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); - CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - - lb0 = load_balance_get (lbi0); - lb1 = load_balance_get (lbi1); - - /* - * this node is for via FIBs we can re-use the hash value from the - * to node if present. - * We don't want to use the same hash value at each level in the recursion - * graph as that would lead to polarisation - */ - hc0 = hc1 = 0; - - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash)) { - if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) - { - hc0 = vnet_buffer (p0)->ip.flow_hash = - vnet_buffer (p0)->ip.flow_hash >> 1; - } - else - { - hc0 = vnet_buffer (p0)->ip.flow_hash = - ip6_compute_flow_hash (ip0, lb0->lb_hash_config); - } - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hc0 & - lb0->lb_n_buckets_minus_1)); + hc0 = vnet_buffer (b[0])->ip.flow_hash = + vnet_buffer (b[0])->ip.flow_hash >> 1; } else { - dpo0 = load_balance_get_bucket_i (lb0, 0); + hc0 = vnet_buffer (b[0])->ip.flow_hash = + ip6_compute_flow_hash (ip0, lb0->lb_hash_config); } - if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash)) { - if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash)) - { - hc1 = vnet_buffer (p1)->ip.flow_hash = - vnet_buffer (p1)->ip.flow_hash >> 1; - } - else - { - hc1 = vnet_buffer (p1)->ip.flow_hash = - ip6_compute_flow_hash (ip1, lb1->lb_hash_config); - } - dpo1 = - load_balance_get_fwd_bucket (lb1, - (hc1 & - lb1->lb_n_buckets_minus_1)); + hc1 = vnet_buffer (b[1])->ip.flow_hash = + vnet_buffer (b[1])->ip.flow_hash >> 1; } else { - dpo1 = load_balance_get_bucket_i (lb1, 0); + hc1 = vnet_buffer (b[1])->ip.flow_hash = + ip6_compute_flow_hash (ip1, lb1->lb_hash_config); } + dpo1 = load_balance_get_fwd_bucket + (lb1, (hc1 & (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - - /* Only process the HBH Option Header if explicitly configured to do so */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? - (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; - } - /* Only process the HBH Option Header if explicitly configured to do so */ - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ? - (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1; - } - - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + next[0] = dpo0->dpoi_next_node; + next[1] = dpo1->dpoi_next_node; - vlib_validate_buffer_enqueue_x2 (vm, node, next, - to_next, n_left_to_next, - pi0, pi1, next0, next1); + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0]; } - - while (n_left_from > 0 && n_left_to_next > 0) + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) { - ip_lookup_next_t next0; - const load_balance_t *lb0; - vlib_buffer_t *p0; - u32 pi0, lbi0, hc0; - const ip6_header_t *ip0; - const dpo_id_t *dpo0; + next[1] = (dpo_is_adj (dpo1) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[1]; + } - pi0 = from[0]; - to_next[0] = pi0; - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - p0 = vlib_get_buffer (vm, pi0); + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0])); + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1])); - ip0 = vlib_buffer_get_current (p0); - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + b += 2; + next += 2; + n_left -= 2; + } + + while (n_left > 0) + { + const load_balance_t *lb0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + u32 lbi0, hc0; + + ip0 = vlib_buffer_get_current (b[0]); + lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; - lb0 = load_balance_get (lbi0); + lb0 = load_balance_get (lbi0); - hc0 = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + hc0 = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash)) { - if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) - { - hc0 = vnet_buffer (p0)->ip.flow_hash = - vnet_buffer (p0)->ip.flow_hash >> 1; - } - else - { - hc0 = vnet_buffer (p0)->ip.flow_hash = - ip6_compute_flow_hash (ip0, lb0->lb_hash_config); - } - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hc0 & - lb0->lb_n_buckets_minus_1)); + hc0 = vnet_buffer (b[0])->ip.flow_hash = + vnet_buffer (b[0])->ip.flow_hash >> 1; } else { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - /* Only process the HBH Option Header if explicitly configured to do so */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? - (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + hc0 = vnet_buffer (b[0])->ip.flow_hash = + ip6_compute_flow_hash (ip0, lb0->lb_hash_config); } + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + next[0] = dpo0->dpoi_next_node; + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_validate_buffer_enqueue_x1 (vm, node, next, - to_next, n_left_to_next, - pi0, next0); + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next[0] = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next[0]; } - vlib_put_next_frame (vm, node, next, n_left_to_next); + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0])); + + b += 1; + next += 1; + n_left -= 1; } + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + return frame->n_vectors; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_load_balance_node) = { - .function = ip6_load_balance, .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", @@ -705,8 +935,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance); - typedef struct { /* Adjacency taken. */ @@ -719,6 +947,7 @@ typedef struct } ip6_forward_next_trace_t; +#ifndef CLIB_MARCH_VARIANT u8 * format_ip6_forward_next_trace (u8 * s, va_list * args) { @@ -727,11 +956,15 @@ format_ip6_forward_next_trace (u8 * s, va_list * args) ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "%U%U", + s = format (s, "%Ufib:%d adj:%d flow:%d", + format_white_space, indent, + t->fib_index, t->adj_index, t->flow_hash); + s = format (s, "\n%U%U", format_white_space, indent, format_ip6_header, t->packet_data, sizeof (t->packet_data)); return s; } +#endif static u8 * format_ip6_lookup_trace (u8 * s, va_list * args) @@ -764,11 +997,12 @@ format_ip6_rewrite_trace (u8 * s, va_list * args) s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - t->adj_index, t->packet_data, sizeof (t->packet_data)); + t->packet_data, sizeof (t->packet_data)); return s; } /* Common trace function for all ip6-forward next nodes. */ +#ifndef CLIB_MARCH_VARIANT void ip6_forward_next_trace (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -807,9 +1041,9 @@ ip6_forward_next_trace (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b0)->sw_if_index[VLIB_RX]); - clib_memcpy (t0->packet_data, - vlib_buffer_get_current (b0), - sizeof (t0->packet_data)); + clib_memcpy_fast (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); } if (b1->flags & VLIB_BUFFER_IS_TRACED) { @@ -822,9 +1056,9 @@ ip6_forward_next_trace (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b1)->sw_if_index[VLIB_RX]); - clib_memcpy (t1->packet_data, - vlib_buffer_get_current (b1), - sizeof (t1->packet_data)); + clib_memcpy_fast (t1->packet_data, + vlib_buffer_get_current (b1), + sizeof (t1->packet_data)); } from += 2; n_left -= 2; @@ -851,9 +1085,9 @@ ip6_forward_next_trace (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b0)->sw_if_index[VLIB_RX]); - clib_memcpy (t0->packet_data, - vlib_buffer_get_current (b0), - sizeof (t0->packet_data)); + clib_memcpy_fast (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); } from += 1; n_left -= 1; @@ -865,35 +1099,23 @@ u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp) { - ip_csum_t sum0; - u16 sum16, payload_length_host_byte_order; - u32 i, n_this_buffer, n_bytes_left; + ip_csum_t sum0 = 0; + u16 payload_length, payload_length_host_byte_order; + u32 i; u32 headers_size = sizeof (ip0[0]); - void *data_this_buffer; + u8 *data_this_buffer; + u8 next_hdr = ip0->protocol; ASSERT (bogus_lengthp); *bogus_lengthp = 0; - /* Initialize checksum with ip header. */ - sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); - data_this_buffer = (void *) (ip0 + 1); - - for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) - { - sum0 = ip_csum_with_carry (sum0, - clib_mem_unaligned (&ip0-> - src_address.as_uword[i], - uword)); - sum0 = - ip_csum_with_carry (sum0, - clib_mem_unaligned (&ip0->dst_address.as_uword[i], - uword)); - } + data_this_buffer = (u8 *) (ip0 + 1); + payload_length = ip0->payload_length; /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) * or UDP-Ping packets */ - if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + if (PREDICT_FALSE (next_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) { u32 skip_bytes; ip6_hop_by_hop_ext_t *ext_hdr = @@ -908,33 +1130,34 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, payload_length_host_byte_order -= skip_bytes; headers_size += skip_bytes; + + /* pseudo-header adjustments: + * exclude ext header bytes from payload length + * use payload IP proto rather than ext header IP proto + */ + payload_length = clib_host_to_net_u16 (payload_length_host_byte_order); + next_hdr = ext_hdr->next_hdr; } - n_bytes_left = n_this_buffer = payload_length_host_byte_order; - if (p0 && n_this_buffer + headers_size > p0->current_length) - n_this_buffer = - p0->current_length > - headers_size ? p0->current_length - headers_size : 0; - while (1) - { - sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); - n_bytes_left -= n_this_buffer; - if (n_bytes_left == 0) - break; + /* Initialize checksum with ip pseudo-header. */ + sum0 = payload_length + clib_host_to_net_u16 (next_hdr); - if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - *bogus_lengthp = 1; - return 0xfefe; - } - p0 = vlib_get_buffer (vm, p0->next_buffer); - data_this_buffer = vlib_buffer_get_current (p0); - n_this_buffer = p0->current_length; + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) + { + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword)); + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword)); } - sum16 = ~ip_csum_fold (sum0); - - return sum16; + if (p0) + return ip_calculate_l4_checksum (vm, p0, sum0, + payload_length_host_byte_order, + (u8 *) ip0, headers_size, NULL); + else + return ip_calculate_l4_checksum (vm, 0, sum0, + payload_length_host_byte_order, NULL, 0, + data_this_buffer); } u32 @@ -966,6 +1189,7 @@ ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } +#endif /** * @brief returns number of links on which src is reachable. @@ -983,7 +1207,7 @@ ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i) (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX]; - lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address); + lbi = ip6_fib_table_fwding_lookup (fib_index, &i->src_address); lb0 = load_balance_get (lbi); return (fib_urpf_check_size (lb0->lb_urpf)); @@ -1011,774 +1235,504 @@ VNET_FEATURE_ARC_INIT (ip6_local) = }; /* *INDENT-ON* */ -static uword -ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, int head_of_feature_arc) +static_always_inline u8 +ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0) { - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - ip_local_next_t next_index; - u32 *from, *to_next, n_left_from, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_input_node.index); - u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace (vm, node, frame, VLIB_TX); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *p0, *p1; - ip6_header_t *ip0, *ip1; - udp_header_t *udp0, *udp1; - u32 pi0, ip_len0, udp_len0, flags0, next0; - u32 pi1, ip_len1, udp_len1, flags1, next1; - i32 len_diff0, len_diff1; - u8 error0, type0, good_l4_csum0, is_tcp_udp0; - u8 error1, type1, good_l4_csum1, is_tcp_udp1; - u32 udp_offset0, udp_offset1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - - if (head_of_feature_arc == 0) - goto skip_checks; - - vnet_buffer (p0)->l3_hdr_offset = p0->current_data; - vnet_buffer (p1)->l3_hdr_offset = p1->current_data; - type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; - type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol]; + u16 payload_length_host_byte_order; + u32 n_this_buffer, n_bytes_left; + ip6_header_t *ip0 = vlib_buffer_get_current (p0); + u32 headers_size = sizeof (ip0[0]); + u8 *data_this_buffer; - flags0 = p0->flags; - flags1 = p1->flags; - is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0); - is_tcp_udp1 = ip6_next_proto_is_tcp_udp (p1, ip1, &udp_offset1); + data_this_buffer = (u8 *) (ip0 + 1); - good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (flags0 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || flags0 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) - != 0; - good_l4_csum1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (flags1 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || flags1 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) - != 0; - len_diff0 = 0; - len_diff1 = 0; + ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *) data_this_buffer; - if (PREDICT_TRUE (is_tcp_udp0)) - { - udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0); - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP - && udp0->checksum == 0; - /* Verify UDP length. */ - if (is_tcp_udp0 == IP_PROTOCOL_UDP) - { - ip_len0 = clib_net_to_host_u16 (ip0->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - len_diff0 = ip_len0 - udp_len0; - } - } - if (PREDICT_TRUE (is_tcp_udp1)) - { - udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1); - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP - && udp1->checksum == 0; - /* Verify UDP length. */ - if (is_tcp_udp1 == IP_PROTOCOL_UDP) - { - ip_len1 = clib_net_to_host_u16 (ip1->payload_length); - udp_len1 = clib_net_to_host_u16 (udp1->length); - len_diff1 = ip_len1 - udp_len1; - } - } + /* validate really icmp6 next */ - good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; - good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN; + if (!(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6) + || (ext_hdr->next_hdr == IP_PROTOCOL_UDP)) + return 0; - len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; - len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0; - if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN - && !good_l4_csum0 - && !(flags0 & - VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) - { - flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); - good_l4_csum0 = - (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - } - if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN - && !good_l4_csum1 - && !(flags1 & - VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) - { - flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1); - good_l4_csum1 = - (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - } - - error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; - error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; - error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1; - - ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == - IP6_ERROR_UDP_CHECKSUM); - ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == - IP6_ERROR_ICMP_CHECKSUM); - error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0); - error1 = (!good_l4_csum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1); - - /* Drop packets from unroutable hosts. */ - /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && - type0 != IP_BUILTIN_PROTOCOL_ICMP && - !ip6_address_is_link_local_unicast (&ip0->src_address)) - { - error0 = (!ip6_urpf_loose_check (im, p0, ip0) - ? IP6_ERROR_SRC_LOOKUP_MISS : error0); - } - if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && - type1 != IP_BUILTIN_PROTOCOL_ICMP && - !ip6_address_is_link_local_unicast (&ip1->src_address)) - { - error1 = (!ip6_urpf_loose_check (im, p1, ip1) - ? IP6_ERROR_SRC_LOOKUP_MISS : error1); - } + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); + n_bytes_left = n_this_buffer = payload_length_host_byte_order; - /* TODO maybe move to lookup? */ - vnet_buffer (p0)->ip.fib_index = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - vnet_buffer (p0)->ip.fib_index = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? vnet_buffer (p0)->ip. - fib_index : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - vnet_buffer (p1)->ip.fib_index = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - vnet_buffer (p1)->ip.fib_index = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? vnet_buffer (p1)->ip. - fib_index : vnet_buffer (p1)->sw_if_index[VLIB_TX]; + u32 n_ip_bytes_this_buffer = + p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); + if (n_this_buffer + headers_size > n_ip_bytes_this_buffer) + { + n_this_buffer = p0->current_length > headers_size ? + n_ip_bytes_this_buffer - headers_size : 0; + } + n_bytes_left -= n_this_buffer; + n_bytes_left -= p0->total_length_not_including_first_buffer; - skip_checks: + if (n_bytes_left == 0) + return 0; + else + return 1; +} - next0 = lm->local_next_by_ip_protocol[ip0->protocol]; - next1 = lm->local_next_by_ip_protocol[ip1->protocol]; - next0 = - error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; - next1 = - error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; +always_inline uword +ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) +{ + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 *from, n_left_from; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_input_node.index); + u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 nexts[VLIB_FRAME_SIZE], *next; - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; - if (head_of_feature_arc) - { - if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, - vnet_buffer (p0)->sw_if_index - [VLIB_RX], &next0, p0); - if (PREDICT_TRUE (error1 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, - vnet_buffer (p1)->sw_if_index - [VLIB_RX], &next1, p1); - } + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - pi0, pi1, next0, next1); - } + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; - while (n_left_from > 0 && n_left_to_next > 0) + while (n_left_from > 2) + { + /* Prefetch next iteration. */ + if (n_left_from >= 6) { - vlib_buffer_t *p0; - ip6_header_t *ip0; - udp_header_t *udp0; - u32 pi0, ip_len0, udp_len0, flags0, next0; - i32 len_diff0; - u8 error0, type0, good_l4_csum0; - u32 udp_offset0; - u8 is_tcp_udp0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - error0 = IP6_ERROR_UNKNOWN_PROTOCOL; - - p0 = vlib_get_buffer (vm, pi0); - ip0 = vlib_buffer_get_current (p0); - - if (head_of_feature_arc == 0) - goto skip_check; + vlib_prefetch_buffer_header (b[4], STORE); + vlib_prefetch_buffer_header (b[5], STORE); + vlib_prefetch_buffer_data (b[2], LOAD); + vlib_prefetch_buffer_data (b[3], LOAD); + } - vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + ip6_error_t error[2]; + error[0] = IP6_ERROR_UNKNOWN_PROTOCOL; + error[1] = IP6_ERROR_UNKNOWN_PROTOCOL; - type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; - flags0 = p0->flags; - is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0); - good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (flags0 & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || flags0 & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) - != 0; + ip6_header_t *ip[2]; + ip[0] = vlib_buffer_get_current (b[0]); + ip[1] = vlib_buffer_get_current (b[1]); - len_diff0 = 0; - if (PREDICT_TRUE (is_tcp_udp0)) + if (head_of_feature_arc) + { + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; + vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data; + + u8 type[2]; + type[0] = lm->builtin_protocol_by_ip_protocol[ip[0]->protocol]; + type[1] = lm->builtin_protocol_by_ip_protocol[ip[1]->protocol]; + + u32 flags[2]; + flags[0] = b[0]->flags; + flags[1] = b[1]->flags; + + u32 good_l4_csum[2]; + good_l4_csum[0] = + flags[0] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM); + good_l4_csum[1] = + flags[1] & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM); + + u32 udp_offset[2] = { }; + u8 is_tcp_udp[2]; + is_tcp_udp[0] = + ip6_next_proto_is_tcp_udp (b[0], ip[0], &udp_offset[0]); + is_tcp_udp[1] = + ip6_next_proto_is_tcp_udp (b[1], ip[1], &udp_offset[1]); + i16 len_diff[2] = { 0 }; + if (PREDICT_TRUE (is_tcp_udp[0])) { - udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0); - /* Don't verify UDP checksum for packets with explicit zero - * checksum. */ - good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP - && udp0->checksum == 0; - /* Verify UDP length. */ - if (is_tcp_udp0 == IP_PROTOCOL_UDP) - { - ip_len0 = clib_net_to_host_u16 (ip0->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - len_diff0 = ip_len0 - udp_len0; - } + udp_header_t *udp = + (udp_header_t *) ((u8 *) ip[0] + udp_offset[0]); + good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UDP + && udp->checksum == 0; + /* optimistically verify UDP length. */ + u16 ip_len, udp_len; + ip_len = clib_net_to_host_u16 (ip[0]->payload_length); + udp_len = clib_net_to_host_u16 (udp->length); + len_diff[0] = ip_len - udp_len; } - - good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; - len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; - - if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN - && !good_l4_csum0 - && !(flags0 & - VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) + if (PREDICT_TRUE (is_tcp_udp[1])) { - flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); - good_l4_csum0 = - (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + udp_header_t *udp = + (udp_header_t *) ((u8 *) ip[1] + udp_offset[1]); + good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UDP + && udp->checksum == 0; + /* optimistically verify UDP length. */ + u16 ip_len, udp_len; + ip_len = clib_net_to_host_u16 (ip[1]->payload_length); + udp_len = clib_net_to_host_u16 (udp->length); + len_diff[1] = ip_len - udp_len; } - error0 = IP6_ERROR_UNKNOWN_PROTOCOL; - error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; + good_l4_csum[0] |= type[0] == IP_BUILTIN_PROTOCOL_UNKNOWN; + good_l4_csum[1] |= type[1] == IP_BUILTIN_PROTOCOL_UNKNOWN; - ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == - IP6_ERROR_UDP_CHECKSUM); - ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == - IP6_ERROR_ICMP_CHECKSUM); - error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0); + len_diff[0] = type[0] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[0] : 0; + len_diff[1] = type[1] == IP_BUILTIN_PROTOCOL_UDP ? len_diff[1] : 0; - /* If this is a neighbor solicitation (ICMP), skip src RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && - type0 != IP_BUILTIN_PROTOCOL_ICMP && - !ip6_address_is_link_local_unicast (&ip0->src_address)) + u8 need_csum[2]; + need_csum[0] = type[0] != IP_BUILTIN_PROTOCOL_UNKNOWN + && !good_l4_csum[0] + && !(flags[0] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + need_csum[1] = type[1] != IP_BUILTIN_PROTOCOL_UNKNOWN + && !good_l4_csum[1] + && !(flags[1] & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (PREDICT_FALSE (need_csum[0])) { - error0 = (!ip6_urpf_loose_check (im, p0, ip0) - ? IP6_ERROR_SRC_LOOKUP_MISS : error0); + flags[0] = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]); + good_l4_csum[0] = flags[0] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT; + error[0] = IP6_ERROR_UNKNOWN_PROTOCOL; } - - vnet_buffer (p0)->ip.fib_index = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - vnet_buffer (p0)->ip.fib_index = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? vnet_buffer (p0)->ip. - fib_index : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - skip_check: - - next0 = lm->local_next_by_ip_protocol[ip0->protocol]; - next0 = - error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; - - p0->error = error_node->errors[error0]; - - if (head_of_feature_arc) + else { - if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, - vnet_buffer (p0)->sw_if_index - [VLIB_RX], &next0, p0); + if (ip6_tcp_udp_icmp_bad_length (vm, b[0])) + error[0] = IP6_ERROR_BAD_LENGTH; + } + if (PREDICT_FALSE (need_csum[1])) + { + flags[1] = ip6_tcp_udp_icmp_validate_checksum (vm, b[1]); + good_l4_csum[1] = flags[1] & VNET_BUFFER_F_L4_CHECKSUM_CORRECT; + error[1] = IP6_ERROR_UNKNOWN_PROTOCOL; + } + else + { + if (ip6_tcp_udp_icmp_bad_length (vm, b[1])) + error[1] = IP6_ERROR_BAD_LENGTH; } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - pi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - -static uword -ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ ); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_local_node, static) = -{ - .function = ip6_local, - .name = "ip6-local", - .vector_size = sizeof (u32), - .format_trace = format_ip6_forward_next_trace, - .n_next_nodes = IP_LOCAL_N_NEXT, - .next_nodes = - { - [IP_LOCAL_NEXT_DROP] = "ip6-drop", - [IP_LOCAL_NEXT_PUNT] = "ip6-punt", - [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup", - [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local); - - -static uword -ip6_local_end_of_arc (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ ); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = { - .function = ip6_local_end_of_arc, - .name = "ip6-local-end-of-arc", - .vector_size = sizeof (u32), - - .format_trace = format_ip6_forward_next_trace, - .sibling_of = "ip6-local", -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc) - -VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { - .arc_name = "ip6-local", - .node_name = "ip6-local-end-of-arc", - .runs_before = 0, /* not before any other features */ -}; -/* *INDENT-ON* */ - -void -ip6_register_protocol (u32 protocol, u32 node_index) -{ - vlib_main_t *vm = vlib_get_main (); - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - - ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); - lm->local_next_by_ip_protocol[protocol] = - vlib_node_add_next (vm, ip6_local_node.index, node_index); -} - -typedef enum -{ - IP6_DISCOVER_NEIGHBOR_NEXT_DROP, - IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX, - IP6_DISCOVER_NEIGHBOR_N_NEXT, -} ip6_discover_neighbor_next_t; - -typedef enum -{ - IP6_DISCOVER_NEIGHBOR_ERROR_DROP, - IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT, - IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS, -} ip6_discover_neighbor_error_t; - -static uword -ip6_discover_neighbor_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_glean) -{ - vnet_main_t *vnm = vnet_get_main (); - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - u32 *from, *to_next_drop; - uword n_left_from, n_left_to_next_drop; - static f64 time_last_seed_change = -1e100; - static u32 hash_seeds[3]; - static uword hash_bitmap[256 / BITS (uword)]; - f64 time_now; - int bogus_length; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace (vm, node, frame, VLIB_TX); - - time_now = vlib_time_now (vm); - if (time_now - time_last_seed_change > 1e-3) - { - uword i; - u32 *r = clib_random_buffer_get_data (&vm->random_buffer, - sizeof (hash_seeds)); - for (i = 0; i < ARRAY_LEN (hash_seeds); i++) - hash_seeds[i] = r[i]; - - /* Mark all hash keys as been not-seen before. */ - for (i = 0; i < ARRAY_LEN (hash_bitmap); i++) - hash_bitmap[i] = 0; - - time_last_seed_change = time_now; - } - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, - to_next_drop, n_left_to_next_drop); - - while (n_left_from > 0 && n_left_to_next_drop > 0) - { - vlib_buffer_t *p0; - ip6_header_t *ip0; - u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; - uword bm0; - ip_adjacency_t *adj0; - vnet_hw_interface_t *hw_if0; - u32 next0; - pi0 = from[0]; + error[0] = len_diff[0] < 0 ? IP6_ERROR_UDP_LENGTH : error[0]; - p0 = vlib_get_buffer (vm, pi0); + error[1] = len_diff[1] < 0 ? IP6_ERROR_UDP_LENGTH : error[1]; - adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == + IP6_ERROR_UDP_CHECKSUM, + "Wrong IP6 errors constants"); + STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == + IP6_ERROR_ICMP_CHECKSUM, + "Wrong IP6 errors constants"); - ip0 = vlib_buffer_get_current (p0); + error[0] = + !good_l4_csum[0] ? IP6_ERROR_UDP_CHECKSUM + type[0] : error[0]; + error[1] = + !good_l4_csum[1] ? IP6_ERROR_UDP_CHECKSUM + type[1] : error[1]; - adj0 = adj_get (adj_index0); + /* Drop packets from unroutable hosts. */ + /* If this is a neighbor solicitation (ICMP), skip source RPF check */ + u8 unroutable[2]; + unroutable[0] = error[0] == IP6_ERROR_UNKNOWN_PROTOCOL + && type[0] != IP_BUILTIN_PROTOCOL_ICMP + && !ip6_address_is_link_local_unicast (&ip[0]->src_address); + unroutable[1] = error[1] == IP6_ERROR_UNKNOWN_PROTOCOL + && type[1] != IP_BUILTIN_PROTOCOL_ICMP + && !ip6_address_is_link_local_unicast (&ip[1]->src_address); + if (PREDICT_FALSE (unroutable[0])) + { + error[0] = + !ip6_urpf_loose_check (im, b[0], + ip[0]) ? IP6_ERROR_SRC_LOOKUP_MISS + : error[0]; + } + if (PREDICT_FALSE (unroutable[1])) + { + error[1] = + !ip6_urpf_loose_check (im, b[1], + ip[1]) ? IP6_ERROR_SRC_LOOKUP_MISS + : error[1]; + } - if (!is_glean) + vnet_buffer (b[0])->ip.fib_index = + vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[0])->sw_if_index[VLIB_TX] : + vnet_buffer (b[0])->ip.fib_index; + vnet_buffer (b[1])->ip.fib_index = + vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[1])->sw_if_index[VLIB_TX] : + vnet_buffer (b[1])->ip.fib_index; + } /* head_of_feature_arc */ + + next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; + next[0] = + error[0] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0]; + next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol]; + next[1] = + error[1] != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[1]; + + b[0]->error = error_node->errors[error[0]]; + b[1]->error = error_node->errors[error[1]]; + + if (head_of_feature_arc) + { + u8 ip6_unknown[2]; + ip6_unknown[0] = error[0] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL; + ip6_unknown[1] = error[1] == (u8) IP6_ERROR_UNKNOWN_PROTOCOL; + if (PREDICT_TRUE (ip6_unknown[0])) { - ip0->dst_address.as_u64[0] = - adj0->sub_type.nbr.next_hop.ip6.as_u64[0]; - ip0->dst_address.as_u64[1] = - adj0->sub_type.nbr.next_hop.ip6.as_u64[1]; + u32 next32 = next[0]; + vnet_feature_arc_start (arc_index, + vnet_buffer (b[0])->sw_if_index + [VLIB_RX], &next32, b[0]); + next[0] = next32; } + if (PREDICT_TRUE (ip6_unknown[1])) + { + u32 next32 = next[1]; + vnet_feature_arc_start (arc_index, + vnet_buffer (b[1])->sw_if_index + [VLIB_RX], &next32, b[1]); + next[1] = next32; + } + } + + /* next */ + b += 2; + next += 2; + n_left_from -= 2; + } - a0 = hash_seeds[0]; - b0 = hash_seeds[1]; - c0 = hash_seeds[2]; + while (n_left_from) + { + u8 error; + error = IP6_ERROR_UNKNOWN_PROTOCOL; - sw_if_index0 = adj0->rewrite_header.sw_if_index; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + ip6_header_t *ip; + ip = vlib_buffer_get_current (b[0]); - a0 ^= sw_if_index0; - b0 ^= ip0->dst_address.as_u32[0]; - c0 ^= ip0->dst_address.as_u32[1]; + if (head_of_feature_arc) + { + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; + u8 type = lm->builtin_protocol_by_ip_protocol[ip->protocol]; + + u32 flags = b[0]->flags; + u32 good_l4_csum = + flags & (VNET_BUFFER_F_L4_CHECKSUM_CORRECT | + VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM); + + u32 udp_offset; + i16 len_diff = 0; + u8 is_tcp_udp = ip6_next_proto_is_tcp_udp (b[0], ip, &udp_offset); + if (PREDICT_TRUE (is_tcp_udp)) + { + udp_header_t *udp = (udp_header_t *) ((u8 *) ip + udp_offset); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UDP + && udp->checksum == 0; + /* optimistically verify UDP length. */ + u16 ip_len, udp_len; + ip_len = clib_net_to_host_u16 (ip->payload_length); + udp_len = clib_net_to_host_u16 (udp->length); + len_diff = ip_len - udp_len; + } - hash_v3_mix32 (a0, b0, c0); + good_l4_csum |= type == IP_BUILTIN_PROTOCOL_UNKNOWN; + len_diff = type == IP_BUILTIN_PROTOCOL_UDP ? len_diff : 0; - b0 ^= ip0->dst_address.as_u32[2]; - c0 ^= ip0->dst_address.as_u32[3]; + u8 need_csum = type != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_csum + && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); + if (PREDICT_FALSE (need_csum)) + { + flags = ip6_tcp_udp_icmp_validate_checksum (vm, b[0]); + good_l4_csum = flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT; + error = IP6_ERROR_UNKNOWN_PROTOCOL; + } + else + { + if (ip6_tcp_udp_icmp_bad_length (vm, b[0])) + error = IP6_ERROR_BAD_LENGTH; + } - hash_v3_finalize32 (a0, b0, c0); - c0 &= BITS (hash_bitmap) - 1; - c0 = c0 / BITS (uword); - m0 = (uword) 1 << (c0 % BITS (uword)); - bm0 = hash_bitmap[c0]; - drop0 = (bm0 & m0) != 0; + error = len_diff < 0 ? IP6_ERROR_UDP_LENGTH : error; - /* Mark it as seen. */ - hash_bitmap[c0] = bm0 | m0; + STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == + IP6_ERROR_UDP_CHECKSUM, + "Wrong IP6 errors constants"); + STATIC_ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == + IP6_ERROR_ICMP_CHECKSUM, + "Wrong IP6 errors constants"); - from += 1; - n_left_from -= 1; - to_next_drop[0] = pi0; - to_next_drop += 1; - n_left_to_next_drop -= 1; + error = !good_l4_csum ? IP6_ERROR_UDP_CHECKSUM + type : error; - hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + /* Drop packets from unroutable hosts. */ + /* If this is a neighbor solicitation (ICMP), skip source RPF check */ + u8 unroutable = error == IP6_ERROR_UNKNOWN_PROTOCOL + && type != IP_BUILTIN_PROTOCOL_ICMP + && !ip6_address_is_link_local_unicast (&ip->src_address); + if (PREDICT_FALSE (unroutable)) + { + error = + !ip6_urpf_loose_check (im, b[0], + ip) ? IP6_ERROR_SRC_LOOKUP_MISS : + error; + } - /* If the interface is link-down, drop the pkt */ - if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) - drop0 = 1; + vnet_buffer (b[0])->ip.fib_index = + vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[0])->sw_if_index[VLIB_TX] : + vnet_buffer (b[0])->ip.fib_index; + } /* head_of_feature_arc */ - p0->error = - node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP - : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]; - if (drop0) - continue; + next[0] = lm->local_next_by_ip_protocol[ip->protocol]; + next[0] = + error != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next[0]; - /* - * the adj has been updated to a rewrite but the node the DPO that got - * us here hasn't - yet. no big deal. we'll drop while we wait. - */ - if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) - continue; + b[0]->error = error_node->errors[error]; - { - u32 bi0 = 0; - icmp6_neighbor_solicitation_header_t *h0; - vlib_buffer_t *b0; - - h0 = vlib_packet_template_get_packet - (vm, &im->discover_neighbor_packet_template, &bi0); - - /* - * Build ethernet header. - * Choose source address based on destination lookup - * adjacency. - */ - if (!ip6_src_address_for_packet (lm, - sw_if_index0, - &ip0->dst_address, - &h0->ip.src_address)) - { - /* There is no address on the interface */ - p0->error = - node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; - vlib_buffer_free (vm, &bi0, 1); - continue; - } - - /* - * Destination address is a solicited node multicast address. - * We need to fill in - * the low 24 bits with low 24 bits of target's address. - */ - h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13]; - h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14]; - h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15]; - - h0->neighbor.target_address = ip0->dst_address; - - clib_memcpy (h0->link_layer_option.ethernet_address, - hw_if0->hw_address, vec_len (hw_if0->hw_address)); - - /* $$$$ appears we need this; why is the checksum non-zero? */ - h0->neighbor.icmp.checksum = 0; - h0->neighbor.icmp.checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, - &bogus_length); - - ASSERT (bogus_length == 0); - - vlib_buffer_copy_trace_flag (vm, p0, bi0); - b0 = vlib_get_buffer (vm, bi0); - vnet_buffer (b0)->sw_if_index[VLIB_TX] - = vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - /* Add rewrite/encap string. */ - vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); - vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); - - next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX; - - vlib_set_next_frame_buffer (vm, node, next0, bi0); - } + if (head_of_feature_arc) + { + if (PREDICT_TRUE (error == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) + { + u32 next32 = next[0]; + vnet_feature_arc_start (arc_index, + vnet_buffer (b[0])->sw_if_index + [VLIB_RX], &next32, b[0]); + next[0] = next32; + } } - vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, - n_left_to_next_drop); + /* next */ + b += 1; + next += 1; + n_left_from -= 1; } + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } -static uword -ip6_discover_neighbor (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return (ip6_discover_neighbor_inline (vm, node, frame, 0)); -} - -static uword -ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return (ip6_discover_neighbor_inline (vm, node, frame, 1)); + return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ ); } -static char *ip6_discover_neighbor_error_strings[] = { - [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", - [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent", - [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS] - = "no source address for ND solicitation", -}; - /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = +VLIB_REGISTER_NODE (ip6_local_node) = { - .function = ip6_discover_neighbor, - .name = "ip6-discover-neighbor", + .name = "ip6-local", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, - .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), - .error_strings = ip6_discover_neighbor_error_strings, - .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .n_next_nodes = IP_LOCAL_N_NEXT, .next_nodes = { - [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "ip6-drop", - [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + [IP_LOCAL_NEXT_DROP] = "ip6-drop", + [IP_LOCAL_NEXT_PUNT] = "ip6-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input", + [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly", }, }; /* *INDENT-ON* */ -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_glean_node) = +VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - .function = ip6_glean, - .name = "ip6-glean", + return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = { + .name = "ip6-local-end-of-arc", .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, - .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), - .error_strings = ip6_discover_neighbor_error_strings, - .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, - .next_nodes = - { - [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "ip6-drop", - [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", - }, + .sibling_of = "ip6-local", +}; + +VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { + .arc_name = "ip6-local", + .node_name = "ip6-local-end-of-arc", + .runs_before = 0, /* not before any other features */ }; /* *INDENT-ON* */ -clib_error_t * -ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) +#ifdef CLIB_MARCH_VARIANT +extern vlib_node_registration_t ip6_local_node; +#else +void +ip6_register_protocol (u32 protocol, u32 node_index) { - vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); ip6_main_t *im = &ip6_main; - icmp6_neighbor_solicitation_header_t *h; - ip6_address_t *src; - ip_interface_address_t *ia; - ip_adjacency_t *adj; - vnet_hw_interface_t *hi; - vnet_sw_interface_t *si; - vlib_buffer_t *b; - adj_index_t ai; - u32 bi = 0; - int bogus_length; - - si = vnet_get_sw_interface (vnm, sw_if_index); - - if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) - { - return clib_error_return (0, "%U: interface %U down", - format_ip6_address, dst, - format_vnet_sw_if_index_name, vnm, - sw_if_index); - } - - src = - ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia); - if (!src) - { - vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; - return clib_error_return - (0, "no matching interface address for destination %U (interface %U)", - format_ip6_address, dst, - format_vnet_sw_if_index_name, vnm, sw_if_index); - } - - h = - vlib_packet_template_get_packet (vm, - &im->discover_neighbor_packet_template, - &bi); - - hi = vnet_get_sup_hw_interface (vnm, sw_if_index); - - /* Destination address is a solicited node multicast address. We need to fill in - the low 24 bits with low 24 bits of target's address. */ - h->ip.dst_address.as_u8[13] = dst->as_u8[13]; - h->ip.dst_address.as_u8[14] = dst->as_u8[14]; - h->ip.dst_address.as_u8[15] = dst->as_u8[15]; - - h->ip.src_address = src[0]; - h->neighbor.target_address = dst[0]; - - if (PREDICT_FALSE (!hi->hw_address)) - { - return clib_error_return (0, "%U: interface %U do not support ip probe", - format_ip6_address, dst, - format_vnet_sw_if_index_name, vnm, - sw_if_index); - } - - clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, - vec_len (hi->hw_address)); - - h->neighbor.icmp.checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); - ASSERT (bogus_length == 0); - - b = vlib_get_buffer (vm, bi); - vnet_buffer (b)->sw_if_index[VLIB_RX] = - vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; - - /* Add encapsulation string for software interface (e.g. ethernet header). */ - ip46_address_t nh = { - .ip6 = *dst, - }; - - ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, - VNET_LINK_IP6, &nh, sw_if_index); - adj = adj_get (ai); - - /* Peer has been previously resolved, retrieve glean adj instead */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) - { - adj_unlock (ai); - ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, - VNET_LINK_IP6, sw_if_index, &nh); - adj = adj_get (ai); - } + ip_lookup_main_t *lm = &im->lookup_main; - vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); - vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = + vlib_node_add_next (vm, ip6_local_node.index, node_index); +} - { - vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); - u32 *to_next = vlib_frame_vector_args (f); - to_next[0] = bi; - f->n_vectors = 1; - vlib_put_frame_to_node (vm, hi->output_node_index, f); - } +void +ip6_unregister_protocol (u32 protocol) +{ + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; - adj_unlock (ai); - return /* no error */ 0; + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT; } +#endif typedef enum { IP6_REWRITE_NEXT_DROP, IP6_REWRITE_NEXT_ICMP_ERROR, + IP6_REWRITE_NEXT_FRAGMENT, + IP6_REWRITE_N_NEXT /* Last */ } ip6_rewrite_next_t; +/** + * This bits of an IPv6 address to mask to construct a multicast + * MAC address + */ +#define IP6_MCAST_ADDR_MASK 0xffffffff + +always_inline void +ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes, + u16 adj_packet_bytes, bool is_locally_generated, + u32 * next, u8 is_midchain, u32 * error) +{ + if (adj_packet_bytes >= 1280 && packet_bytes > adj_packet_bytes) + { + if (is_locally_generated) + { + /* IP fragmentation */ + ip_frag_set_vnet_buffer (b, adj_packet_bytes, + (is_midchain ? + IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN : + IP_FRAG_NEXT_IP_REWRITE), 0); + *next = IP6_REWRITE_NEXT_FRAGMENT; + *error = IP6_ERROR_MTU_EXCEEDED; + } + else + { + *error = IP6_ERROR_MTU_EXCEEDED; + icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0, + adj_packet_bytes); + *next = IP6_REWRITE_NEXT_ICMP_ERROR; + } + } +} + always_inline uword -ip6_rewrite_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int do_counters, int is_midchain, int is_mcast) +ip6_rewrite_inline_with_gso (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) { ip_lookup_main_t *lm = &ip6_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); @@ -1788,7 +1742,7 @@ ip6_rewrite_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; while (n_left_from > 0) { @@ -1796,12 +1750,13 @@ ip6_rewrite_inline (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { - ip_adjacency_t *adj0, *adj1; + const ip_adjacency_t *adj0, *adj1; vlib_buffer_t *p0, *p1; ip6_header_t *ip0, *ip1; u32 pi0, rw_len0, next0, error0, adj_index0; u32 pi1, rw_len1, next1, error1, adj_index1; u32 tx_sw_if_index0, tx_sw_if_index1; + bool is_locally_originated0, is_locally_originated1; /* Prefetch next iteration. */ { @@ -1840,7 +1795,9 @@ ip6_rewrite_inline (vlib_main_t * vm, error0 = error1 = IP6_ERROR_NONE; next0 = next1 = IP6_REWRITE_NEXT_DROP; - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + is_locally_originated0 = + p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated0)) { i32 hop_limit0 = ip0->hop_limit; @@ -1865,11 +1822,10 @@ ip6_rewrite_inline (vlib_main_t * vm, 0); } } - else - { - p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; - } - if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + + is_locally_originated1 = + p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated1)) { i32 hop_limit1 = ip1->hop_limit; @@ -1894,10 +1850,7 @@ ip6_rewrite_inline (vlib_main_t * vm, 0); } } - else - { - p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + adj0 = adj_get (adj_index0); adj1 = adj_get (adj_index1); @@ -1919,19 +1872,30 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error0); - error1 = - (vlib_buffer_length_in_chain (vm, p1) > - adj1[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error1); + u16 ip0_len = + clib_net_to_host_u16 (ip0->payload_length) + + sizeof (ip6_header_t); + u16 ip1_len = + clib_net_to_host_u16 (ip1->payload_length) + + sizeof (ip6_header_t); + if (p0->flags & VNET_BUFFER_F_GSO) + ip0_len = gso_mtu_sz (p0); + if (p1->flags & VNET_BUFFER_F_GSO) + ip1_len = gso_mtu_sz (p1); + + + + ip6_mtu_check (p0, ip0_len, + adj0[0].rewrite_header.max_l3_packet_bytes, + is_locally_originated0, &next0, is_midchain, + &error0); + ip6_mtu_check (p1, ip1_len, + adj1[0].rewrite_header.max_l3_packet_bytes, + is_locally_originated1, &next1, is_midchain, + &error1); /* Don't adjust the buffer for hop count issue; icmp-error node - * wants to see the IP headerr */ + * wants to see the IP header */ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE)) { p0->current_data -= rw_len0; @@ -1943,8 +1907,13 @@ ip6_rewrite_inline (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next0, p0); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0, + adj0->ia_cfg_index); + } + else + { + p0->error = error_node->errors[error0]; } if (PREDICT_TRUE (error1 == IP6_ERROR_NONE)) { @@ -1957,28 +1926,57 @@ ip6_rewrite_inline (vlib_main_t * vm, if (PREDICT_FALSE (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index1, &next1, p1); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1, + adj1->ia_cfg_index); + } + else + { + p1->error = error_node->errors[error1]; } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_two_headers (adj0[0], adj1[0], - ip0, ip1, sizeof (ethernet_header_t)); + if (is_midchain) + { + /* before we paint on the next header, update the L4 + * checksums if required, since there's no offload on a tunnel */ + vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , + 1 /* is_ip6 */ ); + vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ , + 1 /* is_ip6 */ ); + + /* Guess we are only writing on ipv6 header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ip6_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); if (is_midchain) { - adj0->sub_type.midchain.fixup_func - (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); - adj1->sub_type.midchain.fixup_func - (vm, adj1, p1, adj1->sub_type.midchain.fixup_data); + if (adj0->sub_type.midchain.fixup_func) + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + if (adj1->sub_type.midchain.fixup_func) + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj1->sub_type.midchain.fixup_data); } if (is_mcast) { /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); - vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32[3], + (u8 *) ip0); + vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK, + adj1-> + rewrite_header.dst_mcast_offset, + &ip1->dst_address.as_u32[3], + (u8 *) ip1); } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -1994,6 +1992,7 @@ ip6_rewrite_inline (vlib_main_t * vm, u32 pi0, rw_len0; u32 adj_index0, next0, error0; u32 tx_sw_if_index0; + bool is_locally_originated0; pi0 = to_next[0] = from[0]; @@ -2009,7 +2008,9 @@ ip6_rewrite_inline (vlib_main_t * vm, next0 = IP6_REWRITE_NEXT_DROP; /* Check hop limit */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + is_locally_originated0 = + p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED; + if (PREDICT_TRUE (!is_locally_originated0)) { i32 hop_limit0 = ip0->hop_limit; @@ -2033,13 +2034,19 @@ ip6_rewrite_inline (vlib_main_t * vm, 0); } } - else + + if (is_midchain) { - p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; - } + vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , + 1 /* is_ip6 */ ); - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + /* Guess we are only writing on ip6 header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, + sizeof (ethernet_header_t)); /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; @@ -2054,14 +2061,19 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error0); + u16 ip0_len = + clib_net_to_host_u16 (ip0->payload_length) + + sizeof (ip6_header_t); + if (p0->flags & VNET_BUFFER_F_GSO) + ip0_len = gso_mtu_sz (p0); + + ip6_mtu_check (p0, ip0_len, + adj0[0].rewrite_header.max_l3_packet_bytes, + is_locally_originated0, &next0, is_midchain, + &error0); /* Don't adjust the buffer for hop count issue; icmp-error node - * wants to see the IP headerr */ + * wants to see the IP header */ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE)) { p0->current_data -= rw_len0; @@ -2074,22 +2086,30 @@ ip6_rewrite_inline (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next0, p0); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0, + adj0->ia_cfg_index); + } + else + { + p0->error = error_node->errors[error0]; } if (is_midchain) { - adj0->sub_type.midchain.fixup_func - (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + if (adj0->sub_type.midchain.fixup_func) + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); } if (is_mcast) { - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32[3], + (u8 *) ip0); } - p0->error = error_node->errors[error0]; - from += 1; n_left_from -= 1; to_next += 1; @@ -2110,9 +2130,29 @@ ip6_rewrite_inline (vlib_main_t * vm, return frame->n_vectors; } -static uword -ip6_rewrite (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +always_inline uword +ip6_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) +{ + return ip6_rewrite_inline_with_gso (vm, node, frame, do_counters, + is_midchain, is_mcast); +} + +VLIB_NODE_FN (ip6_rewrite_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 0, 0); + else + return ip6_rewrite_inline (vm, node, frame, 0, 0, 0); +} + +VLIB_NODE_FN (ip6_rewrite_bcast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 0, 0); @@ -2120,9 +2160,9 @@ ip6_rewrite (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 0, 0); } -static uword -ip6_rewrite_mcast (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_rewrite_mcast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 0, 1); @@ -2130,9 +2170,9 @@ ip6_rewrite_mcast (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 0, 1); } -static uword -ip6_midchain (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_midchain_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 1, 0); @@ -2140,9 +2180,9 @@ ip6_midchain (vlib_main_t * vm, return ip6_rewrite_inline (vm, node, frame, 0, 1, 0); } -static uword -ip6_mcast_midchain (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); @@ -2153,64 +2193,59 @@ ip6_mcast_midchain (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_midchain_node) = { - .function = ip6_midchain, .name = "ip6-midchain", .vector_size = sizeof (u32), .format_trace = format_ip6_forward_next_trace, .sibling_of = "ip6-rewrite", }; -/* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain); - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_rewrite_node) = { - .function = ip6_rewrite, .name = "ip6-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = IP6_REWRITE_N_NEXT, .next_nodes = { [IP6_REWRITE_NEXT_DROP] = "ip6-drop", [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_REWRITE_NEXT_FRAGMENT] = "ip6-frag", }, }; -/* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite); +VLIB_REGISTER_NODE (ip6_rewrite_bcast_node) = { + .name = "ip6-rewrite-bcast", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = { - .function = ip6_rewrite_mcast, .name = "ip6-rewrite-mcast", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, .sibling_of = "ip6-rewrite", }; -/* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) = +VLIB_REGISTER_NODE (ip6_mcast_midchain_node) = { - .function = ip6_mcast_midchain, .name = "ip6-mcast-midchain", .vector_size = sizeof (u32), .format_trace = format_ip6_rewrite_trace, .sibling_of = "ip6-rewrite", }; -/* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain); +/* *INDENT-ON* */ /* * Hop-by-Hop handling */ +#ifndef CLIB_MARCH_VARIANT ip6_hop_by_hop_main_t ip6_hop_by_hop_main; +#endif /* CLIB_MARCH_VARIANT */ #define foreach_ip6_hop_by_hop_error \ _(PROCESSED, "pkts with ip6 hop-by-hop options") \ @@ -2238,7 +2273,7 @@ typedef struct u8 option_data[256]; } ip6_hop_by_hop_trace_t; -vlib_node_registration_t ip6_hop_by_hop_node; +extern vlib_node_registration_t ip6_hop_by_hop_node; static char *ip6_hop_by_hop_error_strings[] = { #define _(sym,string) string, @@ -2246,6 +2281,7 @@ static char *ip6_hop_by_hop_error_strings[] = { #undef _ }; +#ifndef CLIB_MARCH_VARIANT u8 * format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) { @@ -2289,6 +2325,7 @@ format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) } return s; } +#endif static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args) @@ -2416,9 +2453,9 @@ ip6_scan_hbh_options (vlib_buffer_t * b0, /* * Process the Hop-by-Hop Options header */ -static uword -ip6_hop_by_hop (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index); @@ -2544,7 +2581,7 @@ ip6_hop_by_hop (vlib_main_t * vm, ARRAY_LEN (t->option_data) ? trace_len : ARRAY_LEN (t->option_data); t->trace_len = trace_len; - clib_memcpy (t->option_data, hbh0, trace_len); + clib_memcpy_fast (t->option_data, hbh0, trace_len); } if (b1->flags & VLIB_BUFFER_IS_TRACED) { @@ -2558,7 +2595,7 @@ ip6_hop_by_hop (vlib_main_t * vm, ARRAY_LEN (t->option_data) ? trace_len : ARRAY_LEN (t->option_data); t->trace_len = trace_len; - clib_memcpy (t->option_data, hbh1, trace_len); + clib_memcpy_fast (t->option_data, hbh1, trace_len); } } @@ -2639,7 +2676,7 @@ ip6_hop_by_hop (vlib_main_t * vm, ARRAY_LEN (t->option_data) ? trace_len : ARRAY_LEN (t->option_data); t->trace_len = trace_len; - clib_memcpy (t->option_data, hbh0, trace_len); + clib_memcpy_fast (t->option_data, hbh0, trace_len); } b0->error = error_node->errors[error0]; @@ -2656,7 +2693,6 @@ ip6_hop_by_hop (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = { - .function = ip6_hop_by_hop, .name = "ip6-hop-by-hop", .sibling_of = "ip6-lookup", .vector_size = sizeof (u32), @@ -2668,20 +2704,19 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop); - static clib_error_t * ip6_hop_by_hop_init (vlib_main_t * vm) { ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; - memset (hm->options, 0, sizeof (hm->options)); - memset (hm->trace, 0, sizeof (hm->trace)); + clib_memset (hm->options, 0, sizeof (hm->options)); + clib_memset (hm->trace, 0, sizeof (hm->trace)); hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP; return (0); } VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); +#ifndef CLIB_MARCH_VARIANT void ip6_hbh_set_next_override (uword next) { @@ -2699,7 +2734,7 @@ ip6_hbh_register_option (u8 option, ip6_main_t *im = &ip6_main; ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; - ASSERT (option < ARRAY_LEN (hm->options)); + ASSERT ((u32) option < ARRAY_LEN (hm->options)); /* Already registered */ if (hm->options[option]) @@ -2720,7 +2755,7 @@ ip6_hbh_unregister_option (u8 option) ip6_main_t *im = &ip6_main; ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; - ASSERT (option < ARRAY_LEN (hm->options)); + ASSERT ((u32) option < ARRAY_LEN (hm->options)); /* Not registered */ if (!hm->options[option]) @@ -2748,6 +2783,7 @@ ip6_hbh_unregister_option (u8 option) /* Global IP6 main. */ ip6_main_t ip6_main; +#endif static clib_error_t * ip6_lookup_init (vlib_main_t * vm) @@ -2776,21 +2812,6 @@ ip6_lookup_init (vlib_main_t * vm) ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1); - if (im->lookup_table_nbuckets == 0) - im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS; - - im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets); - - if (im->lookup_table_size == 0) - im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; - - BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash), - "ip6 FIB fwding table", - im->lookup_table_nbuckets, im->lookup_table_size); - BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, - "ip6 FIB non-fwding table", - im->lookup_table_nbuckets, im->lookup_table_size); - /* Create FIB with index 0 and table id of 0. */ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, FIB_SOURCE_DEFAULT_ROUTE); @@ -2806,124 +2827,11 @@ ip6_lookup_init (vlib_main_t * vm) /* Unless explicitly configured, don't process HBH options */ im->hbh_enabled = 0; - { - icmp6_neighbor_solicitation_header_t p; - - memset (&p, 0, sizeof (p)); - - p.ip.ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - p.ip.payload_length = - clib_host_to_net_u16 (sizeof (p) - - STRUCT_OFFSET_OF - (icmp6_neighbor_solicitation_header_t, neighbor)); - p.ip.protocol = IP_PROTOCOL_ICMP6; - p.ip.hop_limit = 255; - ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0); - - p.neighbor.icmp.type = ICMP6_neighbor_solicitation; - - p.link_layer_option.header.type = - ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; - p.link_layer_option.header.n_data_u64s = - sizeof (p.link_layer_option) / sizeof (u64); - - vlib_packet_template_init (vm, - &im->discover_neighbor_packet_template, - &p, sizeof (p), - /* alloc chunk size */ 8, - "ip6 neighbor discovery"); - } - return error; } VLIB_INIT_FUNCTION (ip6_lookup_init); -void -ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, - u8 * mac) -{ - ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); - /* Invert the "u" bit */ - ip->as_u8[8] = mac[0] ^ (1 << 1); - ip->as_u8[9] = mac[1]; - ip->as_u8[10] = mac[2]; - ip->as_u8[11] = 0xFF; - ip->as_u8[12] = 0xFE; - ip->as_u8[13] = mac[3]; - ip->as_u8[14] = mac[4]; - ip->as_u8[15] = mac[5]; -} - -void -ip6_ethernet_mac_address_from_link_local_address (u8 * mac, - ip6_address_t * ip) -{ - /* Invert the previously inverted "u" bit */ - mac[0] = ip->as_u8[8] ^ (1 << 1); - mac[1] = ip->as_u8[9]; - mac[2] = ip->as_u8[10]; - mac[3] = ip->as_u8[13]; - mac[4] = ip->as_u8[14]; - mac[5] = ip->as_u8[15]; -} - -static clib_error_t * -test_ip6_link_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - u8 mac[6]; - ip6_address_t _a, *a = &_a; - - if (unformat (input, "%U", unformat_ethernet_address, mac)) - { - ip6_link_local_address_from_ethernet_mac_address (a, mac); - vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a); - ip6_ethernet_mac_address_from_link_local_address (mac, a); - vlib_cli_output (vm, "Original MAC address: %U", - format_ethernet_address, mac); - } - - return 0; -} - -/*? - * This command converts the given MAC Address into an IPv6 link-local - * address. - * - * @cliexpar - * Example of how to create an IPv6 link-local address: - * @cliexstart{test ip6 link 16:d9:e0:91:79:86} - * Link local address: fe80::14d9:e0ff:fe91:7986 - * Original MAC address: 16:d9:e0:91:79:86 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_link_command, static) = -{ - .path = "test ip6 link", - .function = test_ip6_link_command_fn, - .short_help = "test ip6 link ", -}; -/* *INDENT-ON* */ - -int -vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) -{ - u32 fib_index; - - fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id); - - if (~0 == fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6, - flow_hash_config); - - return 0; -} - static clib_error_t * set_ip6_flow_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2938,8 +2846,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, { if (unformat (input, "table %d", &table_id)) matched = 1; -#define _(a,v) \ - else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} +#define _(a, b, v) \ + else if (unformat (input, #a)) \ + { \ + flow_hash_config |= v; \ + matched = 1; \ + } foreach_flow_hash_bit #undef _ else @@ -2950,7 +2862,7 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); + rv = ip_flow_hash_set (AF_IP6, table_id, flow_hash_config); switch (rv) { case 0: @@ -3043,11 +2955,10 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, * @endparblock ?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = -{ +VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = { .path = "set ip6 flow-hash", - .short_help = - "set ip6 flow-hash table [src] [dst] [sport] [dport] [proto] [reverse]", + .short_help = "set ip6 flow-hash table [src] [dst] [sport] " + "[dport] [proto] [reverse] [flowlabel]", .function = set_ip6_flow_hash_command_fn, }; /* *INDENT-ON* */ @@ -3100,6 +3011,7 @@ VLIB_CLI_COMMAND (show_ip6_local, static) = }; /* *INDENT-ON* */ +#ifndef CLIB_MARCH_VARIANT int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index) @@ -3159,6 +3071,7 @@ vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, return 0; } +#endif static clib_error_t * set_ip6_classify_command_fn (vlib_main_t * vm, @@ -3223,34 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) = }; /* *INDENT-ON* */ -static clib_error_t * -ip6_config (vlib_main_t * vm, unformat_input_t * input) -{ - ip6_main_t *im = &ip6_main; - uword heapsize = 0; - u32 tmp; - u32 nbuckets = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "hash-buckets %d", &tmp)) - nbuckets = tmp; - else if (unformat (input, "heap-size %U", - unformat_memory_size, &heapsize)) - ; - else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); - } - - im->lookup_table_nbuckets = nbuckets; - im->lookup_table_size = heapsize; - - return 0; -} - -VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6"); - /* * fd.io coding-style-patch-verification: ON *