2 * Copyright (c) 2016 Intel Corporation.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
24 ymm_mask_cmp(const _ymm_t *da, const _ymm_t *sa, const _ymm_t *sm)
28 ret = ((sa->u64[0] & sm->u64[0]) ^ da->u64[0]) |
29 ((sa->u64[1] & sm->u64[1]) ^ da->u64[1]) |
30 ((sa->u64[2] & sm->u64[2]) ^ da->u64[2]) |
31 ((sa->u64[3] & sm->u64[3]) ^ da->u64[3]);
37 * Setup tx_offload field inside mbuf using raw 64-bit field.
38 * Consider to move it into DPDK librte_mbuf.
40 static inline uint64_t
41 _mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
42 uint64_t ol3, uint64_t ol2)
44 return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
48 * Given the value of mbuf's tx_offload, calculate L4 payload offset.
50 static inline uint32_t
51 _tx_offload_l4_offset(uint64_t ofl)
56 l3 = ofl >> 7 & 0x1ff;
57 l4 = ofl >> 16 & UINT8_MAX;
63 * Routines to calculate L3/L4 checksums in SW.
64 * Pretty similar to ones from DPDK librte_net/rte_ip.h,
65 * but provide better performance (at least for tested configurations),
66 * and extended functionality.
67 * Consider to move them into DPDK librte_net/rte_ip.h.
70 /* make compiler to generate: add %r1, %r2; adc $0, %r1. */
71 #define CKSUM_ADD_CARRY(s, v) do { \
73 (s) = ((s) < (v)) ? (s) + 1 : (s); \
77 * Process the non-complemented checksum of a buffer.
78 * Similar to rte_raw_cksum(), but provide better perfomance
79 * (at least on IA platforms).
81 * Pointer to the buffer.
83 * Length of the buffer.
85 * The non-complemented checksum.
87 static inline uint16_t
88 __raw_cksum(const uint8_t *buf, uint32_t size)
96 b = (const uint64_t *)buf;
97 n = size / sizeof(*b);
100 /* main loop, consume 8 bytes per iteration. */
101 for (i = 0; i != n; i++) {
103 CKSUM_ADD_CARRY(sum, s);
106 /* consume the remainder. */
107 n = size % sizeof(*b);
109 /* position of the of last 8 bytes of data. */
110 b = (const uint64_t *)((uintptr_t)(b + i) + n - sizeof(*b));
111 /* calculate shift amount. */
112 n = (sizeof(*b) - n) * CHAR_BIT;
114 CKSUM_ADD_CARRY(sum, s);
117 /* reduce to 16 bits */
120 CKSUM_ADD_CARRY(dw1, dw2);
123 CKSUM_ADD_CARRY(w1, w2);
129 * Process UDP or TCP checksum over possibly multi-segmented packet.
131 * The pointer to the mbuf with the packet.
133 * Offset to the beginning of the L4 header (should be in first segment).
135 * Already pre-calculated pseudo-header checksum value.
137 * The complemented checksum.
139 static inline uint32_t
140 __udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
143 uint32_t dlen, i, plen;
144 const struct rte_mbuf *ms;
147 plen = rte_pktmbuf_pkt_len(mb);
150 for (i = l4_ofs; i < plen && ms != NULL; i += dlen) {
151 data = rte_pktmbuf_mtod_offset(ms, const void *, l4_ofs);
152 dlen = rte_pktmbuf_data_len(ms) - l4_ofs;
153 cksum += __raw_cksum(data, dlen);
158 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
159 cksum = (~cksum) & 0xffff;
167 * Process the pseudo-header checksum of an IPv4 header.
169 * Depending on the ol_flags, the pseudo-header checksum expected by the
170 * drivers is not the same. For instance, when TSO is enabled, the IP
171 * payload length must not be included in the packet.
173 * When ol_flags is 0, it computes the standard pseudo-header checksum.
176 * The pointer to the contiguous IPv4 header.
178 * Length of the IPv4 header.
180 * The ol_flags of the associated mbuf.
182 * The non-complemented checksum to set in the L4 header.
184 static inline uint16_t
185 _ipv4x_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, size_t ipv4h_len,
190 s0 = ipv4_hdr->src_addr;
191 s1 = ipv4_hdr->dst_addr;
192 CKSUM_ADD_CARRY(s0, s1);
194 if (ol_flags & PKT_TX_TCP_SEG)
197 s1 = rte_cpu_to_be_16(
198 (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
201 s1 += rte_cpu_to_be_16(ipv4_hdr->next_proto_id);
202 CKSUM_ADD_CARRY(s0, s1);
204 return __rte_raw_cksum_reduce(s0);
208 * Process the IPv4 UDP or TCP checksum.
211 * The pointer to the IPv4 packet.
213 * Offset to the beginning of the L4 header (should be in first segment).
215 * The pointer to the contiguous IPv4 header.
217 * The complemented checksum to set in the IP packet.
220 _ipv4_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
221 const struct ipv4_hdr *ipv4_hdr)
225 cksum = _ipv4x_phdr_cksum(ipv4_hdr, mb->l3_len, 0);
226 cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
232 * Process the IPv6 UDP or TCP checksum.
235 * The pointer to the IPv6 packet.
237 * Offset to the beginning of the L4 header (should be in first segment).
239 * The pointer to the contiguous IPv6 header.
241 * The complemented checksum to set in the IP packet.
244 _ipv6_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
245 const struct ipv6_hdr *ipv6_hdr)
249 cksum = rte_ipv6_phdr_cksum(ipv6_hdr, 0);
250 cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
255 static inline uint16_t
256 _ipv4x_cksum(const void *iph, size_t len)
260 cksum = __raw_cksum(iph, len);
261 return (cksum == 0xffff) ? cksum : ~cksum;
266 * Analog of read-write locks, very much in favour of read side.
267 * Assumes, that there are no more then INT32_MAX concurrent readers.
268 * Consider to move into DPDK librte_eal.
272 rwl_try_acquire(rte_atomic32_t *p)
274 return rte_atomic32_add_return(p, 1);
278 rwl_release(rte_atomic32_t *p)
280 rte_atomic32_sub(p, 1);
284 rwl_acquire(rte_atomic32_t *p)
288 rc = rwl_try_acquire(p);
295 rwl_down(rte_atomic32_t *p)
297 while (rte_atomic32_cmpset((volatile uint32_t *)p, 0, INT32_MIN) == 0)
302 rwl_up(rte_atomic32_t *p)
304 rte_atomic32_sub(p, INT32_MIN);
311 #endif /* _MISC_H_ */