/* * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright 2007 Nuova Systems, Inc. All rights reserved. * * Copyright (c) 2014, Cisco Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #ifndef _VNIC_WQ_H_ #define _VNIC_WQ_H_ #include "vnic_dev.h" #include "vnic_cq.h" /* Work queue control */ struct vnic_wq_ctrl { u64 ring_base; /* 0x00 */ u32 ring_size; /* 0x08 */ u32 pad0; u32 posted_index; /* 0x10 */ u32 pad1; u32 cq_index; /* 0x18 */ u32 pad2; u32 enable; /* 0x20 */ u32 pad3; u32 running; /* 0x28 */ u32 pad4; u32 fetch_index; /* 0x30 */ u32 pad5; u32 dca_value; /* 0x38 */ u32 pad6; u32 error_interrupt_enable; /* 0x40 */ u32 pad7; u32 error_interrupt_offset; /* 0x48 */ u32 pad8; u32 error_status; /* 0x50 */ u32 pad9; }; struct vnic_wq_buf { struct vnic_wq_buf *next; dma_addr_t dma_addr; void *os_buf; unsigned int len; unsigned int index; int sop; void *desc; uint64_t wr_id; /* Cookie */ uint8_t cq_entry; /* Gets completion event from hw */ uint8_t desc_skip_cnt; /* Num descs to occupy */ uint8_t compressed_send; /* Both hdr and payload in one desc */ }; /* Break the vnic_wq_buf allocations into blocks of 32/64 entries */ #define VNIC_WQ_BUF_MIN_BLK_ENTRIES 32 #define VNIC_WQ_BUF_DFLT_BLK_ENTRIES 64 #define VNIC_WQ_BUF_BLK_ENTRIES(entries) \ ((unsigned int)((entries < VNIC_WQ_BUF_DFLT_BLK_ENTRIES) ? \ VNIC_WQ_BUF_MIN_BLK_ENTRIES : VNIC_WQ_BUF_DFLT_BLK_ENTRIES)) #define VNIC_WQ_BUF_BLK_SZ(entries) \ (VNIC_WQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_wq_buf)) #define VNIC_WQ_BUF_BLKS_NEEDED(entries) \ DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES(entries)) #define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096) struct vnic_wq { unsigned int index; struct vnic_dev *vdev; struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ struct vnic_dev_ring ring; struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX]; struct vnic_wq_buf *to_use; struct vnic_wq_buf *to_clean; unsigned int pkts_outstanding; unsigned int socket_id; }; static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) { /* how many does SW own? */ return wq->ring.desc_avail; } static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) { /* how many does HW own? */ return wq->ring.desc_count - wq->ring.desc_avail - 1; } static inline void *vnic_wq_next_desc(struct vnic_wq *wq) { return wq->to_use->desc; } #define PI_LOG2_CACHE_LINE_SIZE 5 #define PI_INDEX_BITS 12 #define PI_INDEX_MASK ((1U << PI_INDEX_BITS) - 1) #define PI_PREFETCH_LEN_MASK ((1U << PI_LOG2_CACHE_LINE_SIZE) - 1) #define PI_PREFETCH_LEN_OFF 16 #define PI_PREFETCH_ADDR_BITS 43 #define PI_PREFETCH_ADDR_MASK ((1ULL << PI_PREFETCH_ADDR_BITS) - 1) #define PI_PREFETCH_ADDR_OFF 21 /** How many cache lines are touched by buffer (addr, len). */ static inline unsigned int num_cache_lines_touched(dma_addr_t addr, unsigned int len) { const unsigned long mask = PI_PREFETCH_LEN_MASK; const unsigned long laddr = (unsigned long)addr; unsigned long lines, equiv_len; /* A. If addr is aligned, our solution is just to round up len to the next boundary. e.g. addr = 0, len = 48 +--------------------+ |XXXXXXXXXXXXXXXXXXXX| 32-byte cacheline a +--------------------+ |XXXXXXXXXX | cacheline b +--------------------+ B. If addr is not aligned, however, we may use an extra cacheline. e.g. addr = 12, len = 22 +--------------------+ | XXXXXXXXXXXXX| +--------------------+ |XX | +--------------------+ Our solution is to make the problem equivalent to case A above by adding the empty space in the first cacheline to the length: unsigned long len; +--------------------+ |eeeeeeeXXXXXXXXXXXXX| "e" is empty space, which we add to len +--------------------+ |XX | +--------------------+ */ equiv_len = len + (laddr & mask); /* Now we can just round up this len to the next 32-byte boundary. */ lines = (equiv_len + mask) & (~mask); /* Scale bytes -> cachelines. */ return lines >> PI_LOG2_CACHE_LINE_SIZE; } static inline u64 vnic_cached_posted_index(dma_addr_t addr, unsigned int len, unsigned int index) { unsigned int num_cache_lines = num_cache_lines_touched(addr, len); /* Wish we could avoid a branch here. We could have separate * vnic_wq_post() and vinc_wq_post_inline(), the latter * only supporting < 1k (2^5 * 2^5) sends, I suppose. This would * eliminate the if (eop) branch as well. */ if (num_cache_lines > PI_PREFETCH_LEN_MASK) num_cache_lines = 0; return (index & PI_INDEX_MASK) | ((num_cache_lines & PI_PREFETCH_LEN_MASK) << PI_PREFETCH_LEN_OFF) | (((addr >> PI_LOG2_CACHE_LINE_SIZE) & PI_PREFETCH_ADDR_MASK) << PI_PREFETCH_ADDR_OFF); } static inline void vnic_wq_post(struct vnic_wq *wq, void *os_buf, dma_addr_t dma_addr, unsigned int len, int sop, int eop, uint8_t desc_skip_cnt, uint8_t cq_entry, uint8_t compressed_send, uint64_t wrid) { struct vnic_wq_buf *buf = wq->to_use; buf->sop = sop; buf->cq_entry = cq_entry; buf->compressed_send = compressed_send; buf->desc_skip_cnt = desc_skip_cnt; buf->os_buf = os_buf; buf->dma_addr = dma_addr; buf->len = len; buf->wr_id = wrid; buf = buf->next; if (eop) { #ifdef DO_PREFETCH uint64_t wr = vnic_cached_posted_index(dma_addr, len, buf->index); #endif /* Adding write memory barrier prevents compiler and/or CPU * reordering, thus avoiding descriptor posting before * descriptor is initialized. Otherwise, hardware can read * stale descriptor fields. */ wmb(); #ifdef DO_PREFETCH /* Intel chipsets seem to limit the rate of PIOs that we can * push on the bus. Thus, it is very important to do a single * 64 bit write here. With two 32-bit writes, my maximum * pkt/sec rate was cut almost in half. -AJF */ iowrite64((uint64_t)wr, &wq->ctrl->posted_index); #else iowrite32(buf->index, &wq->ctrl->posted_index); #endif } wq->to_use = buf; wq->ring.desc_avail -= desc_skip_cnt; } static inline void vnic_wq_service(struct vnic_wq *wq, struct cq_desc *cq_desc, u16 completed_index, void (*buf_service)(struct vnic_wq *wq, struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque), void *opaque) { struct vnic_wq_buf *buf; buf = wq->to_clean; while (1) { (*buf_service)(wq, cq_desc, buf, opaque); wq->ring.desc_avail++; wq->to_clean = buf->next; if (buf->index == completed_index) break; buf = wq->to_clean; } } void vnic_wq_free(struct vnic_wq *wq); int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, unsigned int desc_count, unsigned int desc_size); void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, unsigned int fetch_index, unsigned int posted_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset); void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset); void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error); unsigned int vnic_wq_error_status(struct vnic_wq *wq); void vnic_wq_enable(struct vnic_wq *wq); int vnic_wq_disable(struct vnic_wq *wq); void vnic_wq_clean(struct vnic_wq *wq, void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)); int vnic_wq_mem_size(struct vnic_wq *wq, unsigned int desc_count, unsigned int desc_size); #endif /* _VNIC_WQ_H_ */