New upstream version 18.02
[deb_dpdk.git] / lib / librte_acl / acl_vect.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #ifndef _RTE_ACL_VECT_H_
6 #define _RTE_ACL_VECT_H_
7
8 /**
9  * @file
10  *
11  * RTE ACL SSE/AVX related header.
12  */
13
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17
18
19 /*
20  * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
21  * Shuffles it into different representation:
22  * lo - contains low 32 bits of given N transitions.
23  * hi - contains high 32 bits of given N transitions.
24  */
25 #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
26         lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
27         hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
28 } while (0)
29
30
31 /*
32  * Calculate the address of the next transition for
33  * all types of nodes. Note that only DFA nodes and range
34  * nodes actually transition to another node. Match
35  * nodes not supposed to be encountered here.
36  * For quad range nodes:
37  * Calculate number of range boundaries that are less than the
38  * input value. Range boundaries for each node are in signed 8 bit,
39  * ordered from -128 to 127.
40  * This is effectively a popcnt of bytes that are greater than the
41  * input byte.
42  * Single nodes are processed in the same ways as quad range nodes.
43 */
44 #define ACL_TR_CALC_ADDR(P, S,                                  \
45         addr, index_mask, next_input, shuffle_input,            \
46         ones_16, range_base, tr_lo, tr_hi)               do {   \
47                                                                 \
48         typeof(addr) in, node_type, r, t;                       \
49         typeof(addr) dfa_msk, dfa_ofs, quad_ofs;                \
50                                                                 \
51         t = _##P##_xor_si##S(index_mask, index_mask);           \
52         in = _##P##_shuffle_epi8(next_input, shuffle_input);    \
53                                                                 \
54         /* Calc node type and node addr */                      \
55         node_type = _##P##_andnot_si##S(index_mask, tr_lo);     \
56         addr = _##P##_and_si##S(index_mask, tr_lo);             \
57                                                                 \
58         /* mask for DFA type(0) nodes */                        \
59         dfa_msk = _##P##_cmpeq_epi32(node_type, t);             \
60                                                                 \
61         /* DFA calculations. */                                 \
62         r = _##P##_srli_epi32(in, 30);                          \
63         r = _##P##_add_epi8(r, range_base);                     \
64         t = _##P##_srli_epi32(in, 24);                          \
65         r = _##P##_shuffle_epi8(tr_hi, r);                      \
66                                                                 \
67         dfa_ofs = _##P##_sub_epi32(t, r);                       \
68                                                                 \
69         /* QUAD/SINGLE caluclations. */                         \
70         t = _##P##_cmpgt_epi8(in, tr_hi);                       \
71         t = _##P##_sign_epi8(t, t);                             \
72         t = _##P##_maddubs_epi16(t, t);                         \
73         quad_ofs = _##P##_madd_epi16(t, ones_16);               \
74                                                                 \
75         /* blend DFA and QUAD/SINGLE. */                        \
76         t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);     \
77                                                                 \
78         /* calculate address for next transitions. */           \
79         addr = _##P##_add_epi32(addr, t);                       \
80 } while (0)
81
82
83 #ifdef __cplusplus
84 }
85 #endif
86
87 #endif /* _RTE_ACL_VECT_H_ */