Skip to content

Commit 50912bf

Browse files
committed
epoll: Don't iterate all the fds when using epoll
1 parent f1b842f commit 50912bf

File tree

3 files changed

+154
-42
lines changed

3 files changed

+154
-42
lines changed

common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ typedef struct _st_eventsys_ops {
213213
int (*fd_new)(int); /* New descriptor allocated */
214214
int (*fd_close)(int); /* Descriptor closed */
215215
int (*fd_getlimit)(void); /* Descriptor hard limit */
216+
int (*pollq_add)(_st_pollq_t *pq);
217+
void (*pollq_del)(_st_pollq_t *pq);
216218
} _st_eventsys_t;
217219

218220

event.c

Lines changed: 148 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@ typedef struct _epoll_fd_data {
119119
int wr_ref_cnt;
120120
int ex_ref_cnt;
121121
int revents;
122+
/* The following members aren't touched after forking. */
123+
union {
124+
_st_pollq_t *pq;
125+
_st_pollq_t **pqs;
126+
};
127+
int pq_cnt;
122128
} _epoll_fd_data_t;
123129

124130
static struct _st_epolldata {
@@ -1224,16 +1230,97 @@ ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds)
12241230
return 0;
12251231
}
12261232

1233+
ST_HIDDEN void _st_epoll_pollq_del(_st_pollq_t *pq)
1234+
{
1235+
struct pollfd *pd = pq->pds;
1236+
struct pollfd *pd_end = pd + pq->npds;
1237+
_epoll_fd_data_t *efd;
1238+
int i;
1239+
1240+
while (pd < pd_end) {
1241+
efd = &_st_epoll_data->fd_data[pd->fd];
1242+
if (efd->pq_cnt == 1) {
1243+
if (efd->pq == pq)
1244+
efd->pq = NULL;
1245+
} else if (efd->pq_cnt > 0) {
1246+
for (i = 0; i < efd->pq_cnt; ++i) {
1247+
if (efd->pqs[i] == pq) {
1248+
efd->pqs[i] = NULL;
1249+
break;
1250+
}
1251+
}
1252+
}
1253+
++pd;
1254+
}
1255+
}
1256+
1257+
ST_HIDDEN int _st_epoll_pollq_add(_st_pollq_t *pq)
1258+
{
1259+
struct pollfd *pd = pq->pds;
1260+
struct pollfd *pd_end = pd + pq->npds;
1261+
_epoll_fd_data_t *efd;
1262+
int i;
1263+
_st_pollq_t **pqs;
1264+
1265+
while (pd < pd_end) {
1266+
efd = &_st_epoll_data->fd_data[pd->fd];
1267+
if (efd->pq_cnt == 0) {
1268+
efd->pq = pq;
1269+
efd->pq_cnt = 1;
1270+
} else if (efd->pq_cnt == 1) {
1271+
if (efd->pq == NULL) {
1272+
efd->pq = pq;
1273+
} else {
1274+
assert(efd->pq != pq);
1275+
pqs = malloc(sizeof(*pqs) * 2);
1276+
if (!pqs) {
1277+
_st_epoll_pollq_del(pq);
1278+
errno = ENOMEM;
1279+
return -1;
1280+
}
1281+
pqs[0] = efd->pq;
1282+
pqs[1] = pq;
1283+
efd->pqs = pqs;
1284+
efd->pq_cnt = 2;
1285+
}
1286+
} else {
1287+
for (i = 0; i < efd->pq_cnt; ++i) {
1288+
if (efd->pqs[i] == NULL) {
1289+
efd->pqs[i] = pq;
1290+
break;
1291+
} else {
1292+
assert(efd->pqs[i] != pq);
1293+
}
1294+
}
1295+
if (i == efd->pq_cnt) {
1296+
pqs = realloc(efd->pqs, sizeof(*pqs) * (efd->pq_cnt + 1));
1297+
if (!pqs) {
1298+
_st_epoll_pollq_del(pq);
1299+
errno = ENOMEM;
1300+
return -1;
1301+
}
1302+
efd->pqs = pqs;
1303+
efd->pqs[efd->pq_cnt++] = pq;
1304+
}
1305+
}
1306+
++pd;
1307+
}
1308+
1309+
return 0;
1310+
}
1311+
12271312
ST_HIDDEN void _st_epoll_dispatch(void)
12281313
{
12291314
st_utime_t min_timeout;
12301315
_st_clist_t *q;
12311316
_st_pollq_t *pq;
12321317
struct pollfd *pds, *epds;
12331318
struct epoll_event ev;
1234-
int timeout, nfd, i, osfd, notify;
1319+
int timeout, nfd, i, j, osfd, notify;
12351320
int events, op;
12361321
short revents;
1322+
_epoll_fd_data_t *efd;
1323+
_st_pollq_t **pqs;
12371324

12381325
if (_ST_SLEEPQ == NULL) {
12391326
timeout = -1;
@@ -1255,8 +1342,10 @@ ST_HIDDEN void _st_epoll_dispatch(void)
12551342
_st_epoll_data->pid = getpid();
12561343

12571344
/* Put all descriptors on ioq into new epoll set */
1258-
memset(_st_epoll_data->fd_data, 0,
1259-
_st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t));
1345+
for (i = 0; i < _st_epoll_data->fd_data_size; ++i) {
1346+
memset(&_st_epoll_data->fd_data[i], 0,
1347+
offsetof(_epoll_fd_data_t, pq));
1348+
}
12601349
_st_epoll_data->evtlist_cnt = 0;
12611350
for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
12621351
pq = _ST_POLLQUEUE_PTR(q);
@@ -1278,48 +1367,63 @@ ST_HIDDEN void _st_epoll_dispatch(void)
12781367
}
12791368
}
12801369

1281-
for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
1282-
pq = _ST_POLLQUEUE_PTR(q);
1283-
notify = 0;
1284-
epds = pq->pds + pq->npds;
1285-
1286-
for (pds = pq->pds; pds < epds; pds++) {
1287-
if (_ST_EPOLL_REVENTS(pds->fd) == 0) {
1288-
pds->revents = 0;
1370+
for (i = 0; i < nfd; ++i) {
1371+
osfd = _st_epoll_data->evtlist[i].data.fd;
1372+
efd = &_st_epoll_data->fd_data[osfd];
1373+
assert(efd->pq_cnt > 0);
1374+
if (efd->pq_cnt == 1)
1375+
pqs = &efd->pq;
1376+
else
1377+
pqs = efd->pqs;
1378+
for (j = 0; j < efd->pq_cnt; ++j) {
1379+
pq = pqs[j];
1380+
if (!pq)
12891381
continue;
1290-
}
1291-
osfd = pds->fd;
1292-
events = pds->events;
1293-
revents = 0;
1294-
if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN))
1295-
revents |= POLLIN;
1296-
if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT))
1297-
revents |= POLLOUT;
1298-
if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI))
1299-
revents |= POLLPRI;
1300-
if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR)
1301-
revents |= POLLERR;
1302-
if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP)
1303-
revents |= POLLHUP;
1382+
notify = 0;
1383+
epds = pq->pds + pq->npds;
13041384

1305-
pds->revents = revents;
1306-
if (revents) {
1307-
notify = 1;
1385+
for (pds = pq->pds; pds < epds; pds++) {
1386+
if (_ST_EPOLL_REVENTS(pds->fd) == 0) {
1387+
pds->revents = 0;
1388+
continue;
1389+
}
1390+
osfd = pds->fd;
1391+
events = pds->events;
1392+
revents = 0;
1393+
if ((events & POLLIN) &&
1394+
(_ST_EPOLL_REVENTS(osfd) & EPOLLIN))
1395+
revents |= POLLIN;
1396+
if ((events & POLLOUT) &&
1397+
(_ST_EPOLL_REVENTS(osfd) & EPOLLOUT))
1398+
revents |= POLLOUT;
1399+
if ((events & POLLPRI) &&
1400+
(_ST_EPOLL_REVENTS(osfd) & EPOLLPRI))
1401+
revents |= POLLPRI;
1402+
if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR)
1403+
revents |= POLLERR;
1404+
if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP)
1405+
revents |= POLLHUP;
1406+
1407+
pds->revents = revents;
1408+
if (revents) {
1409+
notify = 1;
1410+
}
13081411
}
1309-
}
1310-
if (notify) {
1311-
ST_REMOVE_LINK(&pq->links);
1312-
pq->on_ioq = 0;
1313-
/*
1314-
* Here we will only delete/modify descriptors that
1315-
* didn't fire (see comments in _st_epoll_pollset_del()).
1316-
*/
1317-
_st_epoll_pollset_del(pq->pds, pq->npds);
1412+
if (notify) {
1413+
_st_epoll_pollq_del(pq);
1414+
ST_REMOVE_LINK(&pq->links);
1415+
pq->on_ioq = 0;
1416+
/*
1417+
* Here we will only delete/modify descriptors that
1418+
* didn't fire (see comments in _st_epoll_pollset_del()).
1419+
*/
1420+
_st_epoll_pollset_del(pq->pds, pq->npds);
13181421

1319-
if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
1320-
_ST_DEL_SLEEPQ(pq->thread);
1321-
pq->thread->state = _ST_ST_RUNNABLE;
1322-
_ST_ADD_RUNQ(pq->thread);
1422+
if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
1423+
_ST_DEL_SLEEPQ(pq->thread);
1424+
pq->thread->state = _ST_ST_RUNNABLE;
1425+
_ST_ADD_RUNQ(pq->thread);
1426+
}
13231427
}
13241428
}
13251429

@@ -1389,7 +1493,9 @@ static _st_eventsys_t _st_epoll_eventsys = {
13891493
_st_epoll_pollset_del,
13901494
_st_epoll_fd_new,
13911495
_st_epoll_fd_close,
1392-
_st_epoll_fd_getlimit
1496+
_st_epoll_fd_getlimit,
1497+
_st_epoll_pollq_add,
1498+
_st_epoll_pollq_del
13931499
};
13941500
#endif /* MD_HAVE_EPOLL */
13951501

sched.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ int st_poll(struct pollfd *pds, int npds, st_utime_t timeout)
7878
pq.npds = npds;
7979
pq.thread = me;
8080
pq.on_ioq = 1;
81+
if (*_st_eventsys->pollq_add && (*_st_eventsys->pollq_add)(&pq))
82+
return -1;
8183
_ST_ADD_IOQ(pq);
8284
if (timeout != ST_UTIME_NO_TIMEOUT)
8385
_ST_ADD_SLEEPQ(me, timeout);
@@ -87,6 +89,8 @@ int st_poll(struct pollfd *pds, int npds, st_utime_t timeout)
8789

8890
n = 0;
8991
if (pq.on_ioq) {
92+
if (*_st_eventsys->pollq_del)
93+
(*_st_eventsys->pollq_del)(&pq);
9094
/* If we timed out, the pollq might still be on the ioq. Remove it */
9195
_ST_DEL_IOQ(pq);
9296
(*_st_eventsys->pollset_del)(pds, npds);

0 commit comments

Comments
 (0)