debian/patches/dpdk-dev-ppc-enable-2-7-acl-add-AltiVec-for-ppc64.patch

   1 From: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
   2 Date: Thu, 8 Sep 2016 22:18:04 +0530
   3 Subject: [PATCH 2/7] acl: add AltiVec for ppc64
   4
   5 This patch adds port for ACL library in ppc64le.
   6
   7 Signed-off-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
   8 Acked-by: Chao Zhu <chaozhu@linux.vnet.ibm.com>
   9 Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
  10
  11 Origin: Upstream, commit:1d73135f9f1c626def280bd9c7e06a9ae157f660
  12 Author: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>
  13 Last-Update: 2016-09-21
  14 ---
  15  app/test-acl/main.c                         |   4 +
  16  config/defconfig_ppc_64-power8-linuxapp-gcc |   1 -
  17  lib/librte_acl/Makefile                     |   2 +
  18  lib/librte_acl/acl.h                        |   4 +
  19  lib/librte_acl/acl_run.h                    |   2 +
  20  lib/librte_acl/acl_run_altivec.c            |  47 ++++
  21  lib/librte_acl/acl_run_altivec.h            | 329 ++++++++++++++++++++++++++++
  22  lib/librte_acl/rte_acl.c                    |  13 ++
  23  lib/librte_acl/rte_acl.h                    |   1 +
  24  9 files changed, 402 insertions(+), 1 deletion(-)
  25  create mode 100644 lib/librte_acl/acl_run_altivec.c
  26  create mode 100644 lib/librte_acl/acl_run_altivec.h
  27
  28 diff --git a/app/test-acl/main.c b/app/test-acl/main.c
  29 index d366981..1b2b176 100644
  30 --- a/app/test-acl/main.c
  31 +++ b/app/test-acl/main.c
  32 @@ -105,6 +105,10 @@ static const struct acl_alg acl_alg[] = {
  33                 .name = "neon",
  34                 .alg = RTE_ACL_CLASSIFY_NEON,
  35         },
  36 +       {
  37 +               .name = "altivec",
  38 +               .alg = RTE_ACL_CLASSIFY_ALTIVEC,
  39 +       },
  40  };
  41
  42  static struct {
  43 diff --git a/config/defconfig_ppc_64-power8-linuxapp-gcc b/config/defconfig_ppc_64-power8-linuxapp-gcc
  44 index 9ddf3c5..dede34f 100644
  45 --- a/config/defconfig_ppc_64-power8-linuxapp-gcc
  46 +++ b/config/defconfig_ppc_64-power8-linuxapp-gcc
  47 @@ -57,7 +57,6 @@ CONFIG_RTE_LIBRTE_ENIC_PMD=n
  48  CONFIG_RTE_LIBRTE_FM10K_PMD=n
  49
  50  # This following libraries are not available on Power. So they're turned off.
  51 -CONFIG_RTE_LIBRTE_ACL=n
  52  CONFIG_RTE_LIBRTE_SCHED=n
  53  CONFIG_RTE_LIBRTE_PORT=n
  54  CONFIG_RTE_LIBRTE_TABLE=n
  55 diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
  56 index 9803e9d..d05be66 100644
  57 --- a/lib/librte_acl/Makefile
  58 +++ b/lib/librte_acl/Makefile
  59 @@ -52,6 +52,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c
  60  ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
  61  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c
  62  CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized
  63 +else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y)
  64 +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_altivec.c
  65  else
  66  SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c
  67  #check if flag for SSE4.1 is already on, if not set it up manually
  68 diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
  69 index 09d6784..6664a55 100644
  70 --- a/lib/librte_acl/acl.h
  71 +++ b/lib/librte_acl/acl.h
  72 @@ -234,6 +234,10 @@ int
  73  rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
  74         uint32_t *results, uint32_t num, uint32_t categories);
  75
  76 +int
  77 +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
  78 +       uint32_t *results, uint32_t num, uint32_t categories);
  79 +
  80  #ifdef __cplusplus
  81  }
  82  #endif /* __cplusplus */
  83 diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h
  84 index b2fc42c..024f393 100644
  85 --- a/lib/librte_acl/acl_run.h
  86 +++ b/lib/librte_acl/acl_run.h
  87 @@ -39,7 +39,9 @@
  88
  89  #define MAX_SEARCHES_AVX16     16
  90  #define MAX_SEARCHES_SSE8      8
  91 +#define MAX_SEARCHES_ALTIVEC8  8
  92  #define MAX_SEARCHES_SSE4      4
  93 +#define MAX_SEARCHES_ALTIVEC4  4
  94  #define MAX_SEARCHES_SCALAR    2
  95
  96  #define GET_NEXT_4BYTES(prm, idx)      \
  97 diff --git a/lib/librte_acl/acl_run_altivec.c b/lib/librte_acl/acl_run_altivec.c
  98 new file mode 100644
  99 index 0000000..3523526
 100 --- /dev/null
 101 +++ b/lib/librte_acl/acl_run_altivec.c
 102 @@ -0,0 +1,47 @@
 103 +/*-
 104 + *   BSD LICENSE
 105 + *
 106 + *   Copyright (C) IBM Corporation 2016.
 107 + *   All rights reserved.
 108 + *
 109 + *   Redistribution and use in source and binary forms, with or without
 110 + *   modification, are permitted provided that the following conditions
 111 + *   are met:
 112 + *
 113 + *     * Redistributions of source code must retain the above copyright
 114 + *       notice, this list of conditions and the following disclaimer.
 115 + *     * Redistributions in binary form must reproduce the above copyright
 116 + *       notice, this list of conditions and the following disclaimer in
 117 + *       the documentation and/or other materials provided with the
 118 + *       distribution.
 119 + *     * Neither the name of Intel Corporation nor the names of its
 120 + *       contributors may be used to endorse or promote products derived
 121 + *       from this software without specific prior written permission.
 122 + *
 123 + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 124 + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 125 + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 126 + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 127 + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 128 + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 129 + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 130 + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 131 + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 132 + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 133 + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 134 + */
 135 +
 136 +#include "acl_run_altivec.h"
 137 +
 138 +int
 139 +rte_acl_classify_altivec(const struct rte_acl_ctx *ctx, const uint8_t **data,
 140 +       uint32_t *results, uint32_t num, uint32_t categories)
 141 +{
 142 +       if (likely(num >= MAX_SEARCHES_ALTIVEC8))
 143 +               return search_altivec_8(ctx, data, results, num, categories);
 144 +       else if (num >= MAX_SEARCHES_ALTIVEC4)
 145 +               return search_altivec_4(ctx, data, results, num, categories);
 146 +       else
 147 +               return rte_acl_classify_scalar(ctx, data, results, num,
 148 +                       categories);
 149 +}
 150 diff --git a/lib/librte_acl/acl_run_altivec.h b/lib/librte_acl/acl_run_altivec.h
 151 new file mode 100644
 152 index 0000000..7d329bc
 153 --- /dev/null
 154 +++ b/lib/librte_acl/acl_run_altivec.h
 155 @@ -0,0 +1,329 @@
 156 +/*
 157 + *   BSD LICENSE
 158 + *
 159 + *   Copyright (C) IBM Corporation 2016.
 160 + *
 161 + *   Redistribution and use in source and binary forms, with or without
 162 + *   modification, are permitted provided that the following conditions
 163 + *   are met:
 164 + *
 165 + *     * Redistributions of source code must retain the above copyright
 166 + *       notice, this list of conditions and the following disclaimer.
 167 + *     * Redistributions in binary form must reproduce the above copyright
 168 + *       notice, this list of conditions and the following disclaimer in
 169 + *       the documentation and/or other materials provided with the
 170 + *       distribution.
 171 + *     * Neither the name of IBM Corporation nor the names of its
 172 + *       contributors may be used to endorse or promote products derived
 173 + *       from this software without specific prior written permission.
 174 + *
 175 + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 176 + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 177 + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 178 + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 179 + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 180 + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 181 + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 182 + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 183 + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 184 + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 185 + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 186 +*/
 187 +
 188 +#include "acl_run.h"
 189 +#include "acl_vect.h"
 190 +
 191 +struct _altivec_acl_const {
 192 +       rte_xmm_t xmm_shuffle_input;
 193 +       rte_xmm_t xmm_index_mask;
 194 +       rte_xmm_t xmm_ones_16;
 195 +       rte_xmm_t range_base;
 196 +} altivec_acl_const  __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
 197 +       {
 198 +               .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
 199 +       },
 200 +       {
 201 +               .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
 202 +               RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
 203 +       },
 204 +       {
 205 +               .u16 = {1, 1, 1, 1, 1, 1, 1, 1}
 206 +       },
 207 +       {
 208 +               .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
 209 +       },
 210 +};
 211 +
 212 +/*
 213 + * Resolve priority for multiple results (altivec version).
 214 + * This consists comparing the priority of the current traversal with the
 215 + * running set of results for the packet.
 216 + * For each result, keep a running array of the result (rule number) and
 217 + * its priority for each category.
 218 + */
 219 +static inline void
 220 +resolve_priority_altivec(uint64_t transition, int n,
 221 +       const struct rte_acl_ctx *ctx, struct parms *parms,
 222 +       const struct rte_acl_match_results *p, uint32_t categories)
 223 +{
 224 +       uint32_t x;
 225 +       xmm_t results, priority, results1, priority1;
 226 +       vector bool int selector;
 227 +       xmm_t *saved_results, *saved_priority;
 228 +
 229 +       for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
 230 +
 231 +               saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
 232 +               saved_priority =
 233 +                       (xmm_t *)(&parms[n].cmplt->priority[x]);
 234 +
 235 +               /* get results and priorities for completed trie */
 236 +               results = *(const xmm_t *)&p[transition].results[x];
 237 +               priority = *(const xmm_t *)&p[transition].priority[x];
 238 +
 239 +               /* if this is not the first completed trie */
 240 +               if (parms[n].cmplt->count != ctx->num_tries) {
 241 +
 242 +                       /* get running best results and their priorities */
 243 +                       results1 = *saved_results;
 244 +                       priority1 = *saved_priority;
 245 +
 246 +                       /* select results that are highest priority */
 247 +                       selector = vec_cmpgt(priority1, priority);
 248 +                       results = vec_sel(results, results1, selector);
 249 +                       priority = vec_sel(priority, priority1,
 250 +                               selector);
 251 +               }
 252 +
 253 +               /* save running best results and their priorities */
 254 +               *saved_results = results;
 255 +               *saved_priority = priority;
 256 +       }
 257 +}
 258 +
 259 +/*
 260 + * Check for any match in 4 transitions
 261 + */
 262 +static inline __attribute__((always_inline)) uint32_t
 263 +check_any_match_x4(uint64_t val[])
 264 +{
 265 +       return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH;
 266 +}
 267 +
 268 +static inline __attribute__((always_inline)) void
 269 +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
 270 +       struct acl_flow_data *flows, uint64_t transitions[])
 271 +{
 272 +       while (check_any_match_x4(transitions)) {
 273 +               transitions[0] = acl_match_check(transitions[0], slot, ctx,
 274 +                       parms, flows, resolve_priority_altivec);
 275 +               transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
 276 +                       parms, flows, resolve_priority_altivec);
 277 +               transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
 278 +                       parms, flows, resolve_priority_altivec);
 279 +               transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
 280 +                       parms, flows, resolve_priority_altivec);
 281 +       }
 282 +}
 283 +
 284 +/*
 285 + * Process 4 transitions (in 2 XMM registers) in parallel
 286 + */
 287 +static inline __attribute__((optimize("O2"))) xmm_t
 288 +transition4(xmm_t next_input, const uint64_t *trans,
 289 +       xmm_t *indices1, xmm_t *indices2)
 290 +{
 291 +       xmm_t addr, tr_lo, tr_hi;
 292 +       xmm_t in, node_type, r, t;
 293 +       xmm_t dfa_ofs, quad_ofs;
 294 +       xmm_t *index_mask, *tp;
 295 +       vector bool int dfa_msk;
 296 +       vector signed char zeroes = {};
 297 +       union {
 298 +               uint64_t d64[2];
 299 +               uint32_t d32[4];
 300 +       } v;
 301 +
 302 +       /* Move low 32 into tr_lo and high 32 into tr_hi */
 303 +       tr_lo = (xmm_t){(*indices1)[0], (*indices1)[2],
 304 +                       (*indices2)[0], (*indices2)[2]};
 305 +       tr_hi = (xmm_t){(*indices1)[1], (*indices1)[3],
 306 +                       (*indices2)[1], (*indices2)[3]};
 307 +
 308 +        /* Calculate the address (array index) for all 4 transitions. */
 309 +       index_mask = (xmm_t *)&altivec_acl_const.xmm_index_mask.u32;
 310 +       t = vec_xor(*index_mask, *index_mask);
 311 +       in = vec_perm(next_input, (xmm_t){},
 312 +               *(vector unsigned char *)&altivec_acl_const.xmm_shuffle_input);
 313 +
 314 +       /* Calc node type and node addr */
 315 +       node_type = vec_and(vec_nor(*index_mask, *index_mask), tr_lo);
 316 +       addr = vec_and(tr_lo, *index_mask);
 317 +
 318 +       /* mask for DFA type(0) nodes */
 319 +       dfa_msk = vec_cmpeq(node_type, t);
 320 +
 321 +       /* DFA calculations. */
 322 +       r = vec_sr(in, (vector unsigned int){30, 30, 30, 30});
 323 +       tp = (xmm_t *)&altivec_acl_const.range_base.u32;
 324 +       r = vec_add(r, *tp);
 325 +       t = vec_sr(in, (vector unsigned int){24, 24, 24, 24});
 326 +       r = vec_perm(tr_hi, (xmm_t){(uint16_t)0 << 16},
 327 +               (vector unsigned char)r);
 328 +
 329 +       dfa_ofs = vec_sub(t, r);
 330 +
 331 +       /* QUAD/SINGLE caluclations. */
 332 +       t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi);
 333 +       t = (xmm_t)vec_sel(
 334 +               vec_sel(
 335 +                       (vector signed char)vec_sub(
 336 +                               zeroes, (vector signed char)t),
 337 +                       (vector signed char)t,
 338 +                       vec_cmpgt((vector signed char)t, zeroes)),
 339 +               zeroes,
 340 +               vec_cmpeq((vector signed char)t, zeroes));
 341 +
 342 +       t = (xmm_t)vec_msum((vector signed char)t,
 343 +               (vector unsigned char)t, (xmm_t){});
 344 +       quad_ofs = (xmm_t)vec_msum((vector signed short)t,
 345 +               *(vector signed short *)&altivec_acl_const.xmm_ones_16.u16,
 346 +               (xmm_t){});
 347 +
 348 +       /* blend DFA and QUAD/SINGLE. */
 349 +       t = vec_sel(quad_ofs, dfa_ofs, dfa_msk);
 350 +
 351 +       /* calculate address for next transitions. */
 352 +       addr = vec_add(addr, t);
 353 +
 354 +       v.d64[0] = (uint64_t)trans[addr[0]];
 355 +       v.d64[1] = (uint64_t)trans[addr[1]];
 356 +       *indices1 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
 357 +       v.d64[0] = (uint64_t)trans[addr[2]];
 358 +       v.d64[1] = (uint64_t)trans[addr[3]];
 359 +       *indices2 = (xmm_t){v.d32[0], v.d32[1], v.d32[2], v.d32[3]};
 360 +
 361 +       return vec_sr(next_input,
 362 +               (vector unsigned int){CHAR_BIT, CHAR_BIT, CHAR_BIT, CHAR_BIT});
 363 +}
 364 +
 365 +/*
 366 + * Execute trie traversal with 8 traversals in parallel
 367 + */
 368 +static inline int
 369 +search_altivec_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
 370 +       uint32_t *results, uint32_t total_packets, uint32_t categories)
 371 +{
 372 +       int n;
 373 +       struct acl_flow_data flows;
 374 +       uint64_t index_array[MAX_SEARCHES_ALTIVEC8];
 375 +       struct completion cmplt[MAX_SEARCHES_ALTIVEC8];
 376 +       struct parms parms[MAX_SEARCHES_ALTIVEC8];
 377 +       xmm_t input0, input1;
 378 +
 379 +       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
 380 +               total_packets, categories, ctx->trans_table);
 381 +
 382 +       for (n = 0; n < MAX_SEARCHES_ALTIVEC8; n++) {
 383 +               cmplt[n].count = 0;
 384 +               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
 385 +       }
 386 +
 387 +        /* Check for any matches. */
 388 +       acl_match_check_x4(0, ctx, parms, &flows, (uint64_t *)&index_array[0]);
 389 +       acl_match_check_x4(4, ctx, parms, &flows, (uint64_t *)&index_array[4]);
 390 +
 391 +       while (flows.started > 0) {
 392 +
 393 +               /* Gather 4 bytes of input data for each stream. */
 394 +               input0 = (xmm_t){GET_NEXT_4BYTES(parms, 0),
 395 +                               GET_NEXT_4BYTES(parms, 1),
 396 +                               GET_NEXT_4BYTES(parms, 2),
 397 +                               GET_NEXT_4BYTES(parms, 3)};
 398 +
 399 +               input1 = (xmm_t){GET_NEXT_4BYTES(parms, 4),
 400 +                               GET_NEXT_4BYTES(parms, 5),
 401 +                               GET_NEXT_4BYTES(parms, 6),
 402 +                               GET_NEXT_4BYTES(parms, 7)};
 403 +
 404 +                /* Process the 4 bytes of input on each stream. */
 405 +
 406 +               input0 = transition4(input0, flows.trans,
 407 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 408 +               input1 = transition4(input1, flows.trans,
 409 +                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
 410 +
 411 +               input0 = transition4(input0, flows.trans,
 412 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 413 +               input1 = transition4(input1, flows.trans,
 414 +                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
 415 +
 416 +               input0 = transition4(input0, flows.trans,
 417 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 418 +               input1 = transition4(input1, flows.trans,
 419 +                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
 420 +
 421 +               input0 = transition4(input0, flows.trans,
 422 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 423 +               input1 = transition4(input1, flows.trans,
 424 +                       (xmm_t *)&index_array[4], (xmm_t *)&index_array[6]);
 425 +
 426 +                /* Check for any matches. */
 427 +               acl_match_check_x4(0, ctx, parms, &flows,
 428 +                       (uint64_t *)&index_array[0]);
 429 +               acl_match_check_x4(4, ctx, parms, &flows,
 430 +                       (uint64_t *)&index_array[4]);
 431 +       }
 432 +
 433 +       return 0;
 434 +}
 435 +
 436 +/*
 437 + * Execute trie traversal with 4 traversals in parallel
 438 + */
 439 +static inline int
 440 +search_altivec_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
 441 +        uint32_t *results, int total_packets, uint32_t categories)
 442 +{
 443 +       int n;
 444 +       struct acl_flow_data flows;
 445 +       uint64_t index_array[MAX_SEARCHES_ALTIVEC4];
 446 +       struct completion cmplt[MAX_SEARCHES_ALTIVEC4];
 447 +       struct parms parms[MAX_SEARCHES_ALTIVEC4];
 448 +       xmm_t input;
 449 +
 450 +       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
 451 +               total_packets, categories, ctx->trans_table);
 452 +
 453 +       for (n = 0; n < MAX_SEARCHES_ALTIVEC4; n++) {
 454 +               cmplt[n].count = 0;
 455 +               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
 456 +       }
 457 +
 458 +       /* Check for any matches. */
 459 +       acl_match_check_x4(0, ctx, parms, &flows, index_array);
 460 +
 461 +       while (flows.started > 0) {
 462 +
 463 +               /* Gather 4 bytes of input data for each stream. */
 464 +               input = (xmm_t){GET_NEXT_4BYTES(parms, 0),
 465 +                               GET_NEXT_4BYTES(parms, 1),
 466 +                               GET_NEXT_4BYTES(parms, 2),
 467 +                               GET_NEXT_4BYTES(parms, 3)};
 468 +
 469 +               /* Process the 4 bytes of input on each stream. */
 470 +               input = transition4(input, flows.trans,
 471 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 472 +               input = transition4(input, flows.trans,
 473 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 474 +               input = transition4(input, flows.trans,
 475 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 476 +               input = transition4(input, flows.trans,
 477 +                       (xmm_t *)&index_array[0], (xmm_t *)&index_array[2]);
 478 +
 479 +               /* Check for any matches. */
 480 +               acl_match_check_x4(0, ctx, parms, &flows, index_array);
 481 +       }
 482 +
 483 +       return 0;
 484 +}
 485 diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
 486 index 4ba9786..8b7e92c 100644
 487 --- a/lib/librte_acl/rte_acl.c
 488 +++ b/lib/librte_acl/rte_acl.c
 489 @@ -75,12 +75,23 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
 490         return -ENOTSUP;
 491  }
 492
 493 +int __attribute__ ((weak))
 494 +rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
 495 +       __rte_unused const uint8_t **data,
 496 +       __rte_unused uint32_t *results,
 497 +       __rte_unused uint32_t num,
 498 +       __rte_unused uint32_t categories)
 499 +{
 500 +       return -ENOTSUP;
 501 +}
 502 +
 503  static const rte_acl_classify_t classify_fns[] = {
 504         [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar,
 505         [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar,
 506         [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse,
 507         [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2,
 508         [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon,
 509 +       [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec,
 510  };
 511
 512  /* by default, use always available scalar code path. */
 513 @@ -119,6 +130,8 @@ rte_acl_init(void)
 514  #elif defined(RTE_ARCH_ARM)
 515         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON))
 516                 alg =  RTE_ACL_CLASSIFY_NEON;
 517 +#elif defined(RTE_ARCH_PPC_64)
 518 +       alg = RTE_ACL_CLASSIFY_ALTIVEC;
 519  #else
 520  #ifdef CC_AVX2_SUPPORT
 521         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
 522 diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
 523 index 0979a09..8d4e2a6 100644
 524 --- a/lib/librte_acl/rte_acl.h
 525 +++ b/lib/librte_acl/rte_acl.h
 526 @@ -271,6 +271,7 @@ enum rte_acl_classify_alg {
 527         RTE_ACL_CLASSIFY_SSE = 2,     /**< requires SSE4.1 support. */
 528         RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
 529         RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
 530 +       RTE_ACL_CLASSIFY_ALTIVEC = 5,    /**< requires ALTIVEC support. */
 531         RTE_ACL_CLASSIFY_NUM          /* should always be the last one. */
 532  };
 533
 534 --
 535 1.9.1
 536