Imported Upstream version 16.04
[deb_dpdk.git] / lib / librte_acl / acl_gen.c
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c
new file mode 100644 (file)
index 0000000..ea557ab
--- /dev/null
@@ -0,0 +1,561 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define        QRANGE_MIN      ((uint8_t)INT8_MIN)
+
+#define        RTE_ACL_VERIFY(exp)     do {                                          \
+       if (!(exp))                                                           \
+               rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
+struct acl_node_counters {
+       int32_t match;
+       int32_t match_used;
+       int32_t single;
+       int32_t quad;
+       int32_t quad_vectors;
+       int32_t dfa;
+       int32_t dfa_gr64;
+};
+
+struct rte_acl_indices {
+       int32_t dfa_index;
+       int32_t quad_index;
+       int32_t single_index;
+       int32_t match_index;
+       int32_t match_start;
+};
+
+static void
+acl_gen_log_stats(const struct rte_acl_ctx *ctx,
+       const struct acl_node_counters *counts,
+       const struct rte_acl_indices *indices,
+       size_t max_size)
+{
+       RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
+               "runtime memory footprint on socket %d:\n"
+               "single nodes/bytes used: %d/%zu\n"
+               "quad nodes/vectors/bytes used: %d/%d/%zu\n"
+               "DFA nodes/group64/bytes used: %d/%d/%zu\n"
+               "match nodes/bytes used: %d/%zu\n"
+               "total: %zu bytes\n"
+               "max limit: %zu bytes\n",
+               ctx->name, ctx->socket_id,
+               counts->single, counts->single * sizeof(uint64_t),
+               counts->quad, counts->quad_vectors,
+               (indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
+               counts->dfa, counts->dfa_gr64,
+               indices->dfa_index * sizeof(uint64_t),
+               counts->match,
+               counts->match * sizeof(struct rte_acl_match_results),
+               ctx->mem_sz,
+               max_size);
+}
+
+static uint64_t
+acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
+{
+       uint64_t idx;
+       uint32_t i;
+
+       idx = 0;
+       for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
+               RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
+               RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
+               idx |= (i - node->dfa_gr64[i]) <<
+                       (6 + RTE_ACL_DFA_GR64_BIT * i);
+       }
+
+       return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
+}
+
+static void
+acl_dfa_fill_gr64(const struct rte_acl_node *node,
+       const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
+{
+       uint32_t i;
+
+       for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
+               memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
+                       src + i * RTE_ACL_DFA_GR64_SIZE,
+                       RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
+       }
+}
+
+static uint32_t
+acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
+       uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
+{
+       uint32_t i, j, k;
+
+       k = 0;
+       for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
+               gr64[i] = i;
+               for (j = 0; j != i; j++) {
+                       if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
+                                       array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
+                                       RTE_ACL_DFA_GR64_SIZE *
+                                       sizeof(array_ptr[0])) == 0)
+                               break;
+               }
+               gr64[i] = (j != i) ? gr64[j] : k++;
+       }
+
+       return k;
+}
+
+static uint32_t
+acl_node_fill_dfa(const struct rte_acl_node *node,
+       uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
+{
+       uint32_t n, x;
+       uint32_t ranges, last_bit;
+       struct rte_acl_node *child;
+       struct rte_acl_bitset *bits;
+
+       ranges = 0;
+       last_bit = 0;
+
+       for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+               dfa[n] = no_match;
+
+       for (x = 0; x < node->num_ptrs; x++) {
+
+               child = node->ptrs[x].ptr;
+               if (child == NULL)
+                       continue;
+
+               bits = &node->ptrs[x].values;
+               for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
+
+                       if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
+                               (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) {
+
+                               dfa[n] = resolved ? child->node_index : x;
+                               ranges += (last_bit == 0);
+                               last_bit = 1;
+                       } else {
+                               last_bit = 0;
+                       }
+               }
+       }
+
+       return ranges;
+}
+
+/*
+*  Counts the number of groups of sequential bits that are
+*  either 0 or 1, as specified by the zero_one parameter. This is used to
+*  calculate the number of ranges in a node to see if it fits in a quad range
+*  node.
+*/
+static int
+acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
+{
+       int n, ranges, last_bit;
+
+       ranges = 0;
+       last_bit = zero_one ^ 1;
+
+       for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
+               if (bits->bits[n / (sizeof(bits_t) * 8)] &
+                               (1 << (n % (sizeof(bits_t) * 8)))) {
+                       if (zero_one == 1 && last_bit != 1)
+                               ranges++;
+                       last_bit = 1;
+               } else {
+                       if (zero_one == 0 && last_bit != 0)
+                               ranges++;
+                       last_bit = 0;
+               }
+       }
+       for (n = 0; n < QRANGE_MIN; n++) {
+               if (bits->bits[n / (sizeof(bits_t) * 8)] &
+                               (1 << (n % (sizeof(bits_t) * 8)))) {
+                       if (zero_one == 1 && last_bit != 1)
+                               ranges++;
+                       last_bit = 1;
+               } else {
+                       if (zero_one == 0 && last_bit != 0)
+                               ranges++;
+                       last_bit = 0;
+               }
+       }
+
+       return ranges;
+}
+
+/*
+ * Count number of ranges spanned by the node's pointers
+ */
+static int
+acl_count_fanout(struct rte_acl_node *node)
+{
+       uint32_t n;
+       int ranges;
+
+       if (node->fanout != 0)
+               return node->fanout;
+
+       ranges = acl_count_sequential_groups(&node->values, 0);
+
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       ranges += acl_count_sequential_groups(
+                               &node->ptrs[n].values, 1);
+       }
+
+       node->fanout = ranges;
+       return node->fanout;
+}
+
+/*
+ * Determine the type of nodes and count each type
+ */
+static void
+acl_count_trie_types(struct acl_node_counters *counts,
+       struct rte_acl_node *node, uint64_t no_match, int force_dfa)
+{
+       uint32_t n;
+       int num_ptrs;
+       uint64_t dfa[RTE_ACL_DFA_SIZE];
+
+       /* skip if this node has been counted */
+       if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
+               return;
+
+       if (node->match_flag != 0 || node->num_ptrs == 0) {
+               counts->match++;
+               node->node_type = RTE_ACL_NODE_MATCH;
+               return;
+       }
+
+       num_ptrs = acl_count_fanout(node);
+
+       /* Force type to dfa */
+       if (force_dfa)
+               num_ptrs = RTE_ACL_DFA_SIZE;
+
+       /* determine node type based on number of ranges */
+       if (num_ptrs == 1) {
+               counts->single++;
+               node->node_type = RTE_ACL_NODE_SINGLE;
+       } else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
+               counts->quad++;
+               counts->quad_vectors += node->fanout;
+               node->node_type = RTE_ACL_NODE_QRANGE;
+       } else {
+               counts->dfa++;
+               node->node_type = RTE_ACL_NODE_DFA;
+               if (force_dfa != 0) {
+                       /* always expand to a max number of nodes. */
+                       for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
+                               node->dfa_gr64[n] = n;
+                       node->fanout = n;
+               } else {
+                       acl_node_fill_dfa(node, dfa, no_match, 0);
+                       node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
+               }
+               counts->dfa_gr64 += node->fanout;
+       }
+
+       /*
+        * recursively count the types of all children
+        */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       acl_count_trie_types(counts, node->ptrs[n].ptr,
+                               no_match, 0);
+       }
+}
+
+static void
+acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
+       int resolved)
+{
+       uint32_t x;
+       int32_t m;
+       uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
+
+       acl_node_fill_dfa(node, dfa, no_match, resolved);
+
+       /*
+        * Rather than going from 0 to 256, the range count and
+        * the layout are from 80-ff then 0-7f due to signed compare
+        * for SSE (cmpgt).
+        */
+       if (node->node_type == RTE_ACL_NODE_QRANGE) {
+
+               m = 0;
+               node_a = node_array;
+               index = dfa[QRANGE_MIN];
+               *node_a++ = index;
+
+               for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
+                       if (dfa[x] != index) {
+                               index = dfa[x];
+                               *node_a++ = index;
+                               node->transitions[m++] = (uint8_t)(x - 1);
+                       }
+               }
+
+               for (x = 0; x < INT8_MAX + 1; x++) {
+                       if (dfa[x] != index) {
+                               index = dfa[x];
+                               *node_a++ = index;
+                               node->transitions[m++] = (uint8_t)(x - 1);
+                       }
+               }
+
+               /* fill unused locations with max value - nothing is greater */
+               for (; m < RTE_ACL_QUAD_SIZE; m++)
+                       node->transitions[m] = INT8_MAX;
+
+               RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
+
+       } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
+               acl_dfa_fill_gr64(node, dfa, node_array);
+       }
+}
+
+/*
+ * Routine that allocates space for this node and recursively calls
+ * to allocate space for each child. Once all the children are allocated,
+ * then resolve all transitions for this node.
+ */
+static void
+acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
+       uint64_t no_match, struct rte_acl_indices *index, int num_categories)
+{
+       uint32_t n, sz, *qtrp;
+       uint64_t *array_ptr;
+       struct rte_acl_match_results *match;
+
+       if (node->node_index != RTE_ACL_NODE_UNDEFINED)
+               return;
+
+       array_ptr = NULL;
+
+       switch (node->node_type) {
+       case RTE_ACL_NODE_DFA:
+               array_ptr = &node_array[index->dfa_index];
+               node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
+               sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
+               index->dfa_index += sz;
+               for (n = 0; n < sz; n++)
+                       array_ptr[n] = no_match;
+               break;
+       case RTE_ACL_NODE_SINGLE:
+               node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
+                       node->node_type;
+               array_ptr = &node_array[index->single_index];
+               index->single_index += 1;
+               array_ptr[0] = no_match;
+               break;
+       case RTE_ACL_NODE_QRANGE:
+               array_ptr = &node_array[index->quad_index];
+               acl_add_ptrs(node, array_ptr, no_match, 0);
+               qtrp = (uint32_t *)node->transitions;
+               node->node_index = qtrp[0];
+               node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
+               node->node_index |= index->quad_index | node->node_type;
+               index->quad_index += node->fanout;
+               break;
+       case RTE_ACL_NODE_MATCH:
+               match = ((struct rte_acl_match_results *)
+                       (node_array + index->match_start));
+               for (n = 0; n != RTE_DIM(match->results); n++)
+                       RTE_ACL_VERIFY(match->results[0] == 0);
+               memcpy(match + index->match_index, node->mrt,
+                       sizeof(*node->mrt));
+               node->node_index = index->match_index | node->node_type;
+               index->match_index += 1;
+               break;
+       case RTE_ACL_NODE_UNDEFINED:
+               RTE_ACL_VERIFY(node->node_type !=
+                       (uint32_t)RTE_ACL_NODE_UNDEFINED);
+               break;
+       }
+
+       /* recursively allocate space for all children */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       acl_gen_node(node->ptrs[n].ptr,
+                               node_array,
+                               no_match,
+                               index,
+                               num_categories);
+       }
+
+       /* All children are resolved, resolve this node's pointers */
+       switch (node->node_type) {
+       case RTE_ACL_NODE_DFA:
+               acl_add_ptrs(node, array_ptr, no_match, 1);
+               break;
+       case RTE_ACL_NODE_SINGLE:
+               for (n = 0; n < node->num_ptrs; n++) {
+                       if (node->ptrs[n].ptr != NULL)
+                               array_ptr[0] = node->ptrs[n].ptr->node_index;
+               }
+               break;
+       case RTE_ACL_NODE_QRANGE:
+               acl_add_ptrs(node, array_ptr, no_match, 1);
+               break;
+       case RTE_ACL_NODE_MATCH:
+               break;
+       case RTE_ACL_NODE_UNDEFINED:
+               RTE_ACL_VERIFY(node->node_type !=
+                       (uint32_t)RTE_ACL_NODE_UNDEFINED);
+               break;
+       }
+}
+
+static void
+acl_calc_counts_indices(struct acl_node_counters *counts,
+       struct rte_acl_indices *indices,
+       struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+       uint64_t no_match)
+{
+       uint32_t n;
+
+       memset(indices, 0, sizeof(*indices));
+       memset(counts, 0, sizeof(*counts));
+
+       /* Get stats on nodes */
+       for (n = 0; n < num_tries; n++) {
+               acl_count_trie_types(counts, node_bld_trie[n].trie,
+                       no_match, 1);
+       }
+
+       indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
+       indices->quad_index = indices->dfa_index +
+               counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
+       indices->single_index = indices->quad_index + counts->quad_vectors;
+       indices->match_start = indices->single_index + counts->single + 1;
+       indices->match_start = RTE_ALIGN(indices->match_start,
+               (XMM_SIZE / sizeof(uint64_t)));
+       indices->match_index = 1;
+}
+
+/*
+ * Generate the runtime structure using build structure
+ */
+int
+rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+       struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+       uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
+{
+       void *mem;
+       size_t total_size;
+       uint64_t *node_array, no_match;
+       uint32_t n, match_index;
+       struct rte_acl_match_results *match;
+       struct acl_node_counters counts;
+       struct rte_acl_indices indices;
+
+       no_match = RTE_ACL_NODE_MATCH;
+
+       /* Fill counts and indices arrays from the nodes. */
+       acl_calc_counts_indices(&counts, &indices,
+               node_bld_trie, num_tries, no_match);
+
+       /* Allocate runtime memory (align to cache boundary) */
+       total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
+               indices.match_start * sizeof(uint64_t) +
+               (counts.match + 1) * sizeof(struct rte_acl_match_results) +
+               XMM_SIZE;
+
+       if (total_size > max_size) {
+               RTE_LOG(DEBUG, ACL,
+                       "Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
+                       "bytes required: %zu, allowed: %zu\n",
+                       ctx->name, total_size, max_size);
+               return -ERANGE;
+       }
+
+       mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
+                       ctx->socket_id);
+       if (mem == NULL) {
+               RTE_LOG(ERR, ACL,
+                       "allocation of %zu bytes on socket %d for %s failed\n",
+                       total_size, ctx->socket_id, ctx->name);
+               return -ENOMEM;
+       }
+
+       /* Fill the runtime structure */
+       match_index = indices.match_start;
+       node_array = (uint64_t *)((uintptr_t)mem +
+               RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
+
+       /*
+        * Setup the NOMATCH node (a SINGLE at the
+        * highest index, that points to itself)
+        */
+
+       node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
+
+       for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+               node_array[n] = no_match;
+
+       /* NOMATCH result at index 0 */
+       match = ((struct rte_acl_match_results *)(node_array + match_index));
+       memset(match, 0, sizeof(*match));
+
+       for (n = 0; n < num_tries; n++) {
+
+               acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
+                       &indices, num_categories);
+
+               if (node_bld_trie[n].trie->node_index == no_match)
+                       trie[n].root_index = 0;
+               else
+                       trie[n].root_index = node_bld_trie[n].trie->node_index;
+       }
+
+       ctx->mem = mem;
+       ctx->mem_sz = total_size;
+       ctx->data_indexes = mem;
+       ctx->num_tries = num_tries;
+       ctx->num_categories = num_categories;
+       ctx->match_index = match_index;
+       ctx->no_match = no_match;
+       ctx->idle = node_array[RTE_ACL_DFA_SIZE];
+       ctx->trans_table = node_array;
+       memcpy(ctx->trie, trie, sizeof(ctx->trie));
+
+       acl_gen_log_stats(ctx, &counts, &indices, max_size);
+       return 0;
+}