2 * Copyright (c) 2016 Intel Corporation.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
19 #include <tle_dpdk_wrapper.h>
26 xmm_cmp(const rte_xmm_t *da, const rte_xmm_t *sa)
30 ret = (sa->u64[0] ^ da->u64[0]) |
31 (sa->u64[1] ^ da->u64[1]);
37 ymm_cmp(const _ymm_t *da, const _ymm_t *sa)
41 ret = (sa->u64[0] ^ da->u64[0]) |
42 (sa->u64[1] ^ da->u64[1]) |
43 (sa->u64[2] ^ da->u64[2]) |
44 (sa->u64[3] ^ da->u64[3]);
50 ymm_mask_cmp(const _ymm_t *da, const _ymm_t *sa, const _ymm_t *sm)
54 ret = ((da->u64[0] & sm->u64[0]) ^ sa->u64[0]) |
55 ((da->u64[1] & sm->u64[1]) ^ sa->u64[1]) |
56 ((da->u64[2] & sm->u64[2]) ^ sa->u64[2]) |
57 ((da->u64[3] & sm->u64[3]) ^ sa->u64[3]);
63 * Setup tx_offload field inside mbuf using raw 64-bit field.
64 * Consider to move it into DPDK librte_mbuf.
66 static inline uint64_t
67 _mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
68 uint64_t ol3, uint64_t ol2)
70 return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
74 * Given the value of mbuf's tx_offload, calculate L4 payload offset.
76 static inline uint32_t
77 _tx_offload_l4_offset(uint64_t ofl)
82 l3 = ofl >> 7 & 0x1ff;
83 l4 = ofl >> 16 & UINT8_MAX;
89 * Routines to calculate L3/L4 checksums in SW.
90 * Pretty similar to ones from DPDK librte_net/rte_ip.h,
91 * but provide better performance (at least for tested configurations),
92 * and extended functionality.
93 * Consider to move them into DPDK librte_net/rte_ip.h.
96 /* make compiler to generate: add %r1, %r2; adc $0, %r1. */
97 #define CKSUM_ADD_CARRY(s, v) do { \
99 (s) = ((s) < (v)) ? (s) + 1 : (s); \
103 * Process the non-complemented checksum of a buffer.
104 * Similar to rte_raw_cksum(), but provide better performance
105 * (at least on IA platforms).
107 * Pointer to the buffer.
109 * Length of the buffer.
111 * The non-complemented checksum.
113 static inline uint16_t
114 __raw_cksum(const uint8_t *buf, uint32_t size)
122 b = (const uint64_t *)buf;
123 n = size / sizeof(*b);
126 /* main loop, consume 8 bytes per iteration. */
127 for (i = 0; i != n; i++) {
129 CKSUM_ADD_CARRY(sum, s);
132 /* consume the remainder. */
133 n = size % sizeof(*b);
135 /* position of the of last 8 bytes of data. */
136 b = (const uint64_t *)((uintptr_t)(b + i) + n - sizeof(*b));
137 /* calculate shift amount. */
138 n = (sizeof(*b) - n) * CHAR_BIT;
140 CKSUM_ADD_CARRY(sum, s);
143 /* reduce to 16 bits */
146 CKSUM_ADD_CARRY(dw1, dw2);
149 CKSUM_ADD_CARRY(w1, w2);
154 * Process UDP or TCP checksum over possibly multi-segmented packet.
156 * The pointer to the mbuf with the packet.
158 * Offset to the beginning of the L4 header (should be in first segment).
160 * Already pre-calculated pseudo-header checksum value.
162 * The complemented checksum.
164 static inline uint32_t
165 __udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
168 uint32_t dlen, i, plen;
169 const struct rte_mbuf *ms;
172 plen = rte_pktmbuf_pkt_len(mb);
175 for (i = l4_ofs; i < plen && ms != NULL; i += dlen) {
176 data = rte_pktmbuf_mtod_offset(ms, const void *, l4_ofs);
177 dlen = rte_pktmbuf_data_len(ms) - l4_ofs;
178 cksum += __raw_cksum(data, dlen);
183 cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
184 cksum = (~cksum) & 0xffff;
192 * Process the pseudo-header checksum of an IPv4 header.
194 * Depending on the ol_flags, the pseudo-header checksum expected by the
195 * drivers is not the same. For instance, when TSO is enabled, the IP
196 * payload length must not be included in the packet.
198 * When ol_flags is 0, it computes the standard pseudo-header checksum.
201 * The pointer to the contiguous IPv4 header.
203 * Length of the IPv4 header.
205 * The ol_flags of the associated mbuf.
207 * The non-complemented checksum to set in the L4 header.
209 static inline uint16_t
210 _ipv4x_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, size_t ipv4h_len,
215 s0 = ipv4_hdr->src_addr;
216 s1 = ipv4_hdr->dst_addr;
217 CKSUM_ADD_CARRY(s0, s1);
219 if (ol_flags & PKT_TX_TCP_SEG)
222 s1 = rte_cpu_to_be_16(
223 (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
226 s1 += rte_cpu_to_be_16(ipv4_hdr->next_proto_id);
227 CKSUM_ADD_CARRY(s0, s1);
229 return __rte_raw_cksum_reduce(s0);
233 * Process the IPv4 UDP or TCP checksum.
236 * The pointer to the IPv4 packet.
238 * Offset to the beginning of the L4 header (should be in first segment).
240 * The pointer to the contiguous IPv4 header.
242 * The complemented checksum to set in the IP packet.
245 _ipv4_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
246 const struct ipv4_hdr *ipv4_hdr)
250 cksum = _ipv4x_phdr_cksum(ipv4_hdr, mb->l3_len, 0);
251 cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
257 * Process the IPv6 UDP or TCP checksum.
260 * The pointer to the IPv6 packet.
262 * Offset to the beginning of the L4 header (should be in first segment).
264 * The pointer to the contiguous IPv6 header.
266 * The complemented checksum to set in the IP packet.
269 _ipv6_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
270 const struct ipv6_hdr *ipv6_hdr)
274 cksum = rte_ipv6_phdr_cksum(ipv6_hdr, 0);
275 cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
280 static inline uint16_t
281 _ipv4x_cksum(const void *iph, size_t len)
285 cksum = __raw_cksum(iph, len);
286 return (cksum == 0xffff) ? cksum : ~cksum;
290 check_pkt_csum(const struct rte_mbuf *m, uint64_t ol_flags, uint32_t type,
293 const struct ipv4_hdr *l3h4;
294 const struct ipv6_hdr *l3h6;
295 const struct udp_hdr *l4h;
300 l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
301 l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
303 if ((ol_flags & PKT_RX_IP_CKSUM_BAD) != 0) {
304 csum = _ipv4x_cksum(l3h4, m->l3_len);
305 ret = (csum != UINT16_MAX);
308 if (ret == 0 && (ol_flags & PKT_RX_L4_CKSUM_BAD) != 0) {
311 * for IPv4 it is allowed to have zero UDP cksum,
312 * for IPv6 valid UDP cksum is mandatory.
314 if (type == TLE_V4) {
315 l4h = (const struct udp_hdr *)((uintptr_t)l3h4 +
317 csum = (proto == IPPROTO_UDP && l4h->dgram_cksum == 0) ?
318 UINT16_MAX : _ipv4_udptcp_mbuf_cksum(m,
319 m->l2_len + m->l3_len, l3h4);
321 csum = _ipv6_udptcp_mbuf_cksum(m,
322 m->l2_len + m->l3_len, l3h6);
324 ret = (csum != UINT16_MAX);
331 * Analog of read-write locks, very much in favour of read side.
332 * Assumes, that there are no more then INT32_MAX concurrent readers.
333 * Consider to move into DPDK librte_eal.
337 rwl_try_acquire(rte_atomic32_t *p)
339 return rte_atomic32_add_return(p, 1);
343 rwl_release(rte_atomic32_t *p)
345 rte_atomic32_sub(p, 1);
349 rwl_acquire(rte_atomic32_t *p)
353 rc = rwl_try_acquire(p);
360 rwl_down(rte_atomic32_t *p)
362 while (rte_atomic32_cmpset((volatile uint32_t *)p, 0, INT32_MIN) == 0)
367 rwl_up(rte_atomic32_t *p)
369 rte_atomic32_sub(p, INT32_MIN);
372 /* exclude NULLs from the final list of packets. */
373 static inline uint32_t
374 compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
378 for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
381 if (pkt[j] == NULL) {
383 /* find how big is it. */
384 for (i = j; i-- != 0 && pkt[i] == NULL; )
387 for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
399 /* empty ring and free queued mbufs */
401 empty_mbuf_ring(struct rte_ring *r)
404 struct rte_mbuf *mb[MAX_PKT_BURST];
407 n = _rte_ring_dequeue_burst(r, (void **)mb, RTE_DIM(mb));
408 for (i = 0; i != n; i++)
409 rte_pktmbuf_free(mb[i]);
417 #endif /* _MISC_H_ */