New upstream version 17.11-rc3
[deb_dpdk.git] / lib / librte_vhost / iotlb.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) 2017 Red Hat, Inc.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Intel Corporation nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #ifdef RTE_LIBRTE_VHOST_NUMA
34 #include <numaif.h>
35 #endif
36
37 #include <rte_tailq.h>
38
39 #include "iotlb.h"
40 #include "vhost.h"
41
42 struct vhost_iotlb_entry {
43         TAILQ_ENTRY(vhost_iotlb_entry) next;
44
45         uint64_t iova;
46         uint64_t uaddr;
47         uint64_t size;
48         uint8_t perm;
49 };
50
51 #define IOTLB_CACHE_SIZE 2048
52
53 static void
54 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
55 {
56         struct vhost_iotlb_entry *node, *temp_node;
57
58         rte_rwlock_write_lock(&vq->iotlb_pending_lock);
59
60         TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
61                 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
62                 rte_mempool_put(vq->iotlb_pool, node);
63         }
64
65         rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
66 }
67
68 bool
69 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
70                                 uint8_t perm)
71 {
72         struct vhost_iotlb_entry *node;
73         bool found = false;
74
75         rte_rwlock_read_lock(&vq->iotlb_pending_lock);
76
77         TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
78                 if ((node->iova == iova) && (node->perm == perm)) {
79                         found = true;
80                         break;
81                 }
82         }
83
84         rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
85
86         return found;
87 }
88
89 void
90 vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
91                                 uint64_t iova, uint8_t perm)
92 {
93         struct vhost_iotlb_entry *node;
94         int ret;
95
96         ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
97         if (ret) {
98                 RTE_LOG(INFO, VHOST_CONFIG,
99                                 "IOTLB pool empty, clear pending misses\n");
100                 vhost_user_iotlb_pending_remove_all(vq);
101                 ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
102                 if (ret) {
103                         RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
104                         return;
105                 }
106         }
107
108         node->iova = iova;
109         node->perm = perm;
110
111         rte_rwlock_write_lock(&vq->iotlb_pending_lock);
112
113         TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
114
115         rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
116 }
117
118 static void
119 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
120                                 uint64_t iova, uint64_t size, uint8_t perm)
121 {
122         struct vhost_iotlb_entry *node, *temp_node;
123
124         rte_rwlock_write_lock(&vq->iotlb_pending_lock);
125
126         TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
127                 if (node->iova < iova)
128                         continue;
129                 if (node->iova >= iova + size)
130                         continue;
131                 if ((node->perm & perm) != node->perm)
132                         continue;
133                 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
134                 rte_mempool_put(vq->iotlb_pool, node);
135         }
136
137         rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
138 }
139
140 static void
141 vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
142 {
143         struct vhost_iotlb_entry *node, *temp_node;
144
145         rte_rwlock_write_lock(&vq->iotlb_lock);
146
147         TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
148                 TAILQ_REMOVE(&vq->iotlb_list, node, next);
149                 rte_mempool_put(vq->iotlb_pool, node);
150         }
151
152         vq->iotlb_cache_nr = 0;
153
154         rte_rwlock_write_unlock(&vq->iotlb_lock);
155 }
156
157 static void
158 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
159 {
160         struct vhost_iotlb_entry *node, *temp_node;
161         int entry_idx;
162
163         rte_rwlock_write_lock(&vq->iotlb_lock);
164
165         entry_idx = rte_rand() % vq->iotlb_cache_nr;
166
167         TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
168                 if (!entry_idx) {
169                         TAILQ_REMOVE(&vq->iotlb_list, node, next);
170                         rte_mempool_put(vq->iotlb_pool, node);
171                         vq->iotlb_cache_nr--;
172                         break;
173                 }
174                 entry_idx--;
175         }
176
177         rte_rwlock_write_unlock(&vq->iotlb_lock);
178 }
179
180 void
181 vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
182                                 uint64_t uaddr, uint64_t size, uint8_t perm)
183 {
184         struct vhost_iotlb_entry *node, *new_node;
185         int ret;
186
187         ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
188         if (ret) {
189                 RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n");
190                 vhost_user_iotlb_cache_random_evict(vq);
191                 ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
192                 if (ret) {
193                         RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
194                         return;
195                 }
196         }
197
198         new_node->iova = iova;
199         new_node->uaddr = uaddr;
200         new_node->size = size;
201         new_node->perm = perm;
202
203         rte_rwlock_write_lock(&vq->iotlb_lock);
204
205         TAILQ_FOREACH(node, &vq->iotlb_list, next) {
206                 /*
207                  * Entries must be invalidated before being updated.
208                  * So if iova already in list, assume identical.
209                  */
210                 if (node->iova == new_node->iova) {
211                         rte_mempool_put(vq->iotlb_pool, new_node);
212                         goto unlock;
213                 } else if (node->iova > new_node->iova) {
214                         TAILQ_INSERT_BEFORE(node, new_node, next);
215                         vq->iotlb_cache_nr++;
216                         goto unlock;
217                 }
218         }
219
220         TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
221         vq->iotlb_cache_nr++;
222
223 unlock:
224         vhost_user_iotlb_pending_remove(vq, iova, size, perm);
225
226         rte_rwlock_write_unlock(&vq->iotlb_lock);
227
228 }
229
230 void
231 vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
232                                         uint64_t iova, uint64_t size)
233 {
234         struct vhost_iotlb_entry *node, *temp_node;
235
236         if (unlikely(!size))
237                 return;
238
239         rte_rwlock_write_lock(&vq->iotlb_lock);
240
241         TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
242                 /* Sorted list */
243                 if (unlikely(iova + size < node->iova))
244                         break;
245
246                 if (iova < node->iova + node->size) {
247                         TAILQ_REMOVE(&vq->iotlb_list, node, next);
248                         rte_mempool_put(vq->iotlb_pool, node);
249                         vq->iotlb_cache_nr--;
250                 }
251         }
252
253         rte_rwlock_write_unlock(&vq->iotlb_lock);
254 }
255
256 uint64_t
257 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
258                                                 uint64_t *size, uint8_t perm)
259 {
260         struct vhost_iotlb_entry *node;
261         uint64_t offset, vva = 0, mapped = 0;
262
263         if (unlikely(!*size))
264                 goto out;
265
266         TAILQ_FOREACH(node, &vq->iotlb_list, next) {
267                 /* List sorted by iova */
268                 if (unlikely(iova < node->iova))
269                         break;
270
271                 if (iova >= node->iova + node->size)
272                         continue;
273
274                 if (unlikely((perm & node->perm) != perm)) {
275                         vva = 0;
276                         break;
277                 }
278
279                 offset = iova - node->iova;
280                 if (!vva)
281                         vva = node->uaddr + offset;
282
283                 mapped += node->size - offset;
284                 iova = node->iova + node->size;
285
286                 if (mapped >= *size)
287                         break;
288         }
289
290 out:
291         /* Only part of the requested chunk is mapped */
292         if (unlikely(mapped < *size))
293                 *size = mapped;
294
295         return vva;
296 }
297
298 int
299 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
300 {
301         char pool_name[RTE_MEMPOOL_NAMESIZE];
302         struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
303         int socket = 0;
304
305         if (vq->iotlb_pool) {
306                 /*
307                  * The cache has already been initialized,
308                  * just drop all cached and pending entries.
309                  */
310                 vhost_user_iotlb_cache_remove_all(vq);
311                 vhost_user_iotlb_pending_remove_all(vq);
312         }
313
314 #ifdef RTE_LIBRTE_VHOST_NUMA
315         if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
316                 socket = 0;
317 #endif
318
319         rte_rwlock_init(&vq->iotlb_lock);
320         rte_rwlock_init(&vq->iotlb_pending_lock);
321
322         TAILQ_INIT(&vq->iotlb_list);
323         TAILQ_INIT(&vq->iotlb_pending_list);
324
325         snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
326                         dev->vid, vq_index);
327
328         /* If already created, free it and recreate */
329         vq->iotlb_pool = rte_mempool_lookup(pool_name);
330         if (vq->iotlb_pool)
331                 rte_mempool_free(vq->iotlb_pool);
332
333         vq->iotlb_pool = rte_mempool_create(pool_name,
334                         IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
335                         0, 0, NULL, NULL, NULL, socket,
336                         MEMPOOL_F_NO_CACHE_ALIGN |
337                         MEMPOOL_F_SP_PUT |
338                         MEMPOOL_F_SC_GET);
339         if (!vq->iotlb_pool) {
340                 RTE_LOG(ERR, VHOST_CONFIG,
341                                 "Failed to create IOTLB cache pool (%s)\n",
342                                 pool_name);
343                 return -1;
344         }
345
346         vq->iotlb_cache_nr = 0;
347
348         return 0;
349 }
350