New upstream version 18.08
[deb_dpdk.git] / lib / librte_vhost / fd_man.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/socket.h>
9 #include <sys/time.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 #include <string.h>
13
14 #include <rte_common.h>
15 #include <rte_log.h>
16
17 #include "fd_man.h"
18
19
20 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
21
22 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
23
24 static int
25 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
26 {
27         int i;
28
29         for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
30                 ;
31
32         return i;
33 }
34
35 static void
36 fdset_move(struct fdset *pfdset, int dst, int src)
37 {
38         pfdset->fd[dst]    = pfdset->fd[src];
39         pfdset->rwfds[dst] = pfdset->rwfds[src];
40 }
41
42 static void
43 fdset_shrink_nolock(struct fdset *pfdset)
44 {
45         int i;
46         int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
47
48         for (i = 0; i < last_valid_idx; i++) {
49                 if (pfdset->fd[i].fd != -1)
50                         continue;
51
52                 fdset_move(pfdset, i, last_valid_idx);
53                 last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
54         }
55         pfdset->num = last_valid_idx + 1;
56 }
57
58 /*
59  * Find deleted fd entries and remove them
60  */
61 static void
62 fdset_shrink(struct fdset *pfdset)
63 {
64         pthread_mutex_lock(&pfdset->fd_mutex);
65         fdset_shrink_nolock(pfdset);
66         pthread_mutex_unlock(&pfdset->fd_mutex);
67 }
68
69 /**
70  * Returns the index in the fdset for a given fd.
71  * @return
72  *   index for the fd, or -1 if fd isn't in the fdset.
73  */
74 static int
75 fdset_find_fd(struct fdset *pfdset, int fd)
76 {
77         int i;
78
79         for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
80                 ;
81
82         return i == pfdset->num ? -1 : i;
83 }
84
85 static void
86 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
87         fd_cb rcb, fd_cb wcb, void *dat)
88 {
89         struct fdentry *pfdentry = &pfdset->fd[idx];
90         struct pollfd *pfd = &pfdset->rwfds[idx];
91
92         pfdentry->fd  = fd;
93         pfdentry->rcb = rcb;
94         pfdentry->wcb = wcb;
95         pfdentry->dat = dat;
96
97         pfd->fd = fd;
98         pfd->events  = rcb ? POLLIN : 0;
99         pfd->events |= wcb ? POLLOUT : 0;
100         pfd->revents = 0;
101 }
102
103 void
104 fdset_init(struct fdset *pfdset)
105 {
106         int i;
107
108         if (pfdset == NULL)
109                 return;
110
111         for (i = 0; i < MAX_FDS; i++) {
112                 pfdset->fd[i].fd = -1;
113                 pfdset->fd[i].dat = NULL;
114         }
115         pfdset->num = 0;
116 }
117
118 /**
119  * Register the fd in the fdset with read/write handler and context.
120  */
121 int
122 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
123 {
124         int i;
125
126         if (pfdset == NULL || fd == -1)
127                 return -1;
128
129         pthread_mutex_lock(&pfdset->fd_mutex);
130         i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
131         if (i == -1) {
132                 fdset_shrink_nolock(pfdset);
133                 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
134                 if (i == -1) {
135                         pthread_mutex_unlock(&pfdset->fd_mutex);
136                         return -2;
137                 }
138         }
139
140         fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
141         pthread_mutex_unlock(&pfdset->fd_mutex);
142
143         return 0;
144 }
145
146 /**
147  *  Unregister the fd from the fdset.
148  *  Returns context of a given fd or NULL.
149  */
150 void *
151 fdset_del(struct fdset *pfdset, int fd)
152 {
153         int i;
154         void *dat = NULL;
155
156         if (pfdset == NULL || fd == -1)
157                 return NULL;
158
159         do {
160                 pthread_mutex_lock(&pfdset->fd_mutex);
161
162                 i = fdset_find_fd(pfdset, fd);
163                 if (i != -1 && pfdset->fd[i].busy == 0) {
164                         /* busy indicates r/wcb is executing! */
165                         dat = pfdset->fd[i].dat;
166                         pfdset->fd[i].fd = -1;
167                         pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
168                         pfdset->fd[i].dat = NULL;
169                         i = -1;
170                 }
171                 pthread_mutex_unlock(&pfdset->fd_mutex);
172         } while (i != -1);
173
174         return dat;
175 }
176
177 /**
178  *  Unregister the fd from the fdset.
179  *
180  *  If parameters are invalid, return directly -2.
181  *  And check whether fd is busy, if yes, return -1.
182  *  Otherwise, try to delete the fd from fdset and
183  *  return true.
184  */
185 int
186 fdset_try_del(struct fdset *pfdset, int fd)
187 {
188         int i;
189
190         if (pfdset == NULL || fd == -1)
191                 return -2;
192
193         pthread_mutex_lock(&pfdset->fd_mutex);
194         i = fdset_find_fd(pfdset, fd);
195         if (i != -1 && pfdset->fd[i].busy) {
196                 pthread_mutex_unlock(&pfdset->fd_mutex);
197                 return -1;
198         }
199
200         if (i != -1) {
201                 pfdset->fd[i].fd = -1;
202                 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
203                 pfdset->fd[i].dat = NULL;
204         }
205
206         pthread_mutex_unlock(&pfdset->fd_mutex);
207         return 0;
208 }
209
210 /**
211  * This functions runs in infinite blocking loop until there is no fd in
212  * pfdset. It calls corresponding r/w handler if there is event on the fd.
213  *
214  * Before the callback is called, we set the flag to busy status; If other
215  * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
216  * will wait until the flag is reset to zero(which indicates the callback is
217  * finished), then it could free the context after fdset_del.
218  */
219 void *
220 fdset_event_dispatch(void *arg)
221 {
222         int i;
223         struct pollfd *pfd;
224         struct fdentry *pfdentry;
225         fd_cb rcb, wcb;
226         void *dat;
227         int fd, numfds;
228         int remove1, remove2;
229         int need_shrink;
230         struct fdset *pfdset = arg;
231         int val;
232
233         if (pfdset == NULL)
234                 return NULL;
235
236         while (1) {
237
238                 /*
239                  * When poll is blocked, other threads might unregister
240                  * listenfds from and register new listenfds into fdset.
241                  * When poll returns, the entries for listenfds in the fdset
242                  * might have been updated. It is ok if there is unwanted call
243                  * for new listenfds.
244                  */
245                 pthread_mutex_lock(&pfdset->fd_mutex);
246                 numfds = pfdset->num;
247                 pthread_mutex_unlock(&pfdset->fd_mutex);
248
249                 val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
250                 if (val < 0)
251                         continue;
252
253                 need_shrink = 0;
254                 for (i = 0; i < numfds; i++) {
255                         pthread_mutex_lock(&pfdset->fd_mutex);
256
257                         pfdentry = &pfdset->fd[i];
258                         fd = pfdentry->fd;
259                         pfd = &pfdset->rwfds[i];
260
261                         if (fd < 0) {
262                                 need_shrink = 1;
263                                 pthread_mutex_unlock(&pfdset->fd_mutex);
264                                 continue;
265                         }
266
267                         if (!pfd->revents) {
268                                 pthread_mutex_unlock(&pfdset->fd_mutex);
269                                 continue;
270                         }
271
272                         remove1 = remove2 = 0;
273
274                         rcb = pfdentry->rcb;
275                         wcb = pfdentry->wcb;
276                         dat = pfdentry->dat;
277                         pfdentry->busy = 1;
278
279                         pthread_mutex_unlock(&pfdset->fd_mutex);
280
281                         if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
282                                 rcb(fd, dat, &remove1);
283                         if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
284                                 wcb(fd, dat, &remove2);
285                         pfdentry->busy = 0;
286                         /*
287                          * fdset_del needs to check busy flag.
288                          * We don't allow fdset_del to be called in callback
289                          * directly.
290                          */
291                         /*
292                          * When we are to clean up the fd from fdset,
293                          * because the fd is closed in the cb,
294                          * the old fd val could be reused by when creates new
295                          * listen fd in another thread, we couldn't call
296                          * fdset_del.
297                          */
298                         if (remove1 || remove2) {
299                                 pfdentry->fd = -1;
300                                 need_shrink = 1;
301                         }
302                 }
303
304                 if (need_shrink)
305                         fdset_shrink(pfdset);
306         }
307
308         return NULL;
309 }
310
311 static void
312 fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
313                    int *remove __rte_unused)
314 {
315         char charbuf[16];
316         int r = read(readfd, charbuf, sizeof(charbuf));
317         /*
318          * Just an optimization, we don't care if read() failed
319          * so ignore explicitly its return value to make the
320          * compiler happy
321          */
322         RTE_SET_USED(r);
323 }
324
325 void
326 fdset_pipe_uninit(struct fdset *fdset)
327 {
328         fdset_del(fdset, fdset->u.readfd);
329         close(fdset->u.readfd);
330         close(fdset->u.writefd);
331 }
332
333 int
334 fdset_pipe_init(struct fdset *fdset)
335 {
336         int ret;
337
338         if (pipe(fdset->u.pipefd) < 0) {
339                 RTE_LOG(ERR, VHOST_FDMAN,
340                         "failed to create pipe for vhost fdset\n");
341                 return -1;
342         }
343
344         ret = fdset_add(fdset, fdset->u.readfd,
345                         fdset_pipe_read_cb, NULL, NULL);
346
347         if (ret < 0) {
348                 RTE_LOG(ERR, VHOST_FDMAN,
349                         "failed to add pipe readfd %d into vhost server fdset\n",
350                         fdset->u.readfd);
351
352                 fdset_pipe_uninit(fdset);
353                 return -1;
354         }
355
356         return 0;
357 }
358
359 void
360 fdset_pipe_notify(struct fdset *fdset)
361 {
362         int r = write(fdset->u.writefd, "1", 1);
363         /*
364          * Just an optimization, we don't care if write() failed
365          * so ignore explicitly its return value to make the
366          * compiler happy
367          */
368         RTE_SET_USED(r);
369
370 }