From 0cdd34c1e094438d7b5deae1c13f6f54c83032b7 Mon Sep 17 00:00:00 2001 From: sauwming Date: Mon, 6 Jun 2022 11:41:35 +0800 Subject: [PATCH] Patch to improve epoll implementation (#3121) --- pjlib/include/pj/config.h | 9 ++++ pjlib/src/pj/ioqueue_epoll.c | 100 +++++++++++++++++++++++------------ 2 files changed, 76 insertions(+), 33 deletions(-) diff --git a/pjlib/include/pj/config.h b/pjlib/include/pj/config.h index 37d2b9023..a208dfb75 100644 --- a/pjlib/include/pj/config.h +++ b/pjlib/include/pj/config.h @@ -744,6 +744,15 @@ #endif +/* Setting to determine whether to enable epoll exclusive/oneshot feature. + * + * Default: 1 (enabled) + */ +#ifndef PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT +# define PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT 1 +#endif + + /** * Determine if FD_SETSIZE is changeable/set-able. If so, then we will * set it to PJ_IOQUEUE_MAX_HANDLES. Currently we detect this by checking diff --git a/pjlib/src/pj/ioqueue_epoll.c b/pjlib/src/pj/ioqueue_epoll.c index a18d284c3..4fe20f7a6 100644 --- a/pjlib/src/pj/ioqueue_epoll.c +++ b/pjlib/src/pj/ioqueue_epoll.c @@ -119,16 +119,17 @@ static void scan_closing_keys(pj_ioqueue_t *ioqueue); # include # if OPENSSL_VERSION_NUMBER < 0x10100000L -# define DONT_USE_EXCL_ONESHOT +# undef PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT +# define PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT 0 # endif #endif /* Use EPOLLEXCLUSIVE or EPOLLONESHOT to signal one thread only at a time. */ -#if defined(EPOLLEXCLUSIVE) && !defined(DONT_USE_EXCL_ONESHOT) +#if defined(EPOLLEXCLUSIVE) && PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT # define USE_EPOLLEXCLUSIVE 1 # define USE_EPOLLONESHOT 0 -#elif defined(EPOLLONESHOT) && !defined(DONT_USE_EXCL_ONESHOT) +#elif defined(EPOLLONESHOT) && PJ_IOQUEUE_EPOLL_ENABLE_EXCLUSIVE_ONESHOT # define USE_EPOLLEXCLUSIVE 0 # define USE_EPOLLONESHOT 1 #else @@ -493,10 +494,17 @@ PJ_DEF(pj_status_t) pj_ioqueue_unregister( pj_ioqueue_key_t *key) ev.epoll_data = (epoll_data_type)key; status = os_epoll_ctl( ioqueue->epfd, EPOLL_CTL_DEL, key->fd, &ev); if (status != 0) { - pj_status_t rc = pj_get_os_error(); - pj_lock_release(ioqueue->lock); - pj_ioqueue_unlock_key(key); - return rc; + // pj_status_t rc = pj_get_os_error(); + TRACE_((THIS_FILE, + "pj_ioqueue_unregister error: os_epoll_ctl rc=%d", + pj_get_os_error())); + /* From epoll doc: "Closing a file descriptor cause it to be + * removed from all epoll interest lists". So we should just + * proceed instead of returning failure here. + */ + // pj_lock_release(ioqueue->lock); + // pj_ioqueue_unlock_key(key); + // return rc; } /* Destroy the key. */ @@ -578,20 +586,11 @@ static void ioqueue_remove_from_set( pj_ioqueue_t *ioqueue, pj_ioqueue_key_t *key, enum ioqueue_event_type event_type) { -#if USE_EPOLLONESHOT - /* For EPOLLONESHOT, always rearm ioqueue for events. */ - PJ_UNUSED_ARG(event_type); - pj_uint32_t ev = EPOLLIN | EPOLLERR; - if (key_has_pending_write(key)) - ev |= EPOLLOUT; - update_epoll_event_set(ioqueue, key, ev); -#else /* Remove EPOLLOUT if write event received and no pending send */ if (event_type == WRITEABLE_EVENT && !key_has_pending_write(key)) { pj_uint32_t ev = EPOLLIN | EPOLLERR; update_epoll_event_set(ioqueue, key, ev); } -#endif } /* @@ -604,6 +603,14 @@ static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue, pj_ioqueue_key_t *key, enum ioqueue_event_type event_type ) { +#if USE_EPOLLONESHOT + /* For EPOLLONESHOT, always rearm ioqueue for events. */ + PJ_UNUSED_ARG(event_type); + pj_uint32_t ev = EPOLLIN | EPOLLERR; + if (key_has_pending_connect(key) || key_has_pending_write(key)) + ev |= EPOLLOUT; + update_epoll_event_set(ioqueue, key, ev); +#else /* Add EPOLLOUT if write-event requested (other events are always set) */ if (event_type == WRITEABLE_EVENT) { pj_uint32_t ev = EPOLLIN | EPOLLERR; @@ -612,6 +619,7 @@ static void ioqueue_add_to_set( pj_ioqueue_t *ioqueue, update_epoll_event_set(ioqueue, key, ev); } +#endif } #if PJ_IOQUEUE_HAS_SAFE_UNREG @@ -692,7 +700,6 @@ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) pj_lock_acquire(ioqueue->lock); for (event_cnt=0, i=0; igrp_lock) @@ -818,11 +816,6 @@ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) } if (event_done) { ++processed_cnt; - } else { - pj_ioqueue_lock_key(queue[i].key); - ioqueue_remove_from_set(ioqueue, queue[i].key, - queue[i].event_type); - pj_ioqueue_unlock_key(queue[i].key); } } @@ -835,11 +828,52 @@ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) "ioqueue", 0); } +#if USE_EPOLLONESHOT + /* When using EPOLLONESHOT, we will receive one-shot notification for + * the associated file descriptor, after which the file descriptor + * is disabled in the interest list and no other events will be + * reported. + * Note the following cases can happen: + * - we do not want to process a reported event (i.e. event_cnt < count) + * - we process the event, but the processing doesn't rearm the file + * descriptor (such as during processing failure) + * So we need to make sure to rearm the file descriptor here with + * a new event mask. + */ + for (i=0; i 0 but no descriptors are actually set! + * When epoll returns > 0 but event_cnt, the number of events + * we want to process, is zero. + * There are several possibilities this can happen: + * - Multiple polling threads can receive the same event + * (if not EXCLUSIVE nor ONESHOT), and only one thread will + * process it. + * - Events EPOLLIN is always on, while EPOLLERR and EPOLLHUP + * will always be automatically reported even though we + * never specifically asked for them. */ if (count > 0 && !event_cnt && msec > 0) { - pj_thread_sleep(msec); +#if !USE_EPOLLEXCLUSIVE && !USE_EPOLLONESHOT + /* We need to sleep in order to avoid busy polling, such + * as in the case of the thread that doesn't process + * the event as explained above. + * Note that the sleep period should be reduced by + * the amount of time already used for epoll_wait(). + */ + int delay = msec - pj_elapsed_usec(&t1, &t2)/1000; + if (delay > 0) + pj_thread_sleep(delay); +#endif } TRACE_((THIS_FILE, " poll: count=%d events=%d processed=%d",