From 7354201b5ddd420a230bb4910d3c3d859a5a46cc Mon Sep 17 00:00:00 2001 From: Richard Harrison Date: Mon, 13 May 2019 15:21:08 +0200 Subject: [PATCH] Added background (threaded) garbage collector --- simgear/nasal/CMakeLists.txt | 1 + simgear/nasal/ThreadedGarbageCollector.cpp | 176 ++++++++++++ simgear/nasal/code.c | 46 +-- .../nasal/cppbind/detail/to_nasal_helper.cxx | 118 ++++++++ simgear/nasal/gc.c | 263 +++++++++++++++--- simgear/nasal/hash.c | 11 + simgear/nasal/misc.c | 2 +- 7 files changed, 554 insertions(+), 63 deletions(-) create mode 100644 simgear/nasal/ThreadedGarbageCollector.cpp diff --git a/simgear/nasal/CMakeLists.txt b/simgear/nasal/CMakeLists.txt index 8f4d2c90..e5881450 100644 --- a/simgear/nasal/CMakeLists.txt +++ b/simgear/nasal/CMakeLists.txt @@ -28,6 +28,7 @@ set(SOURCES code.h data.h parse.h + ThreadedGarbageCollector.cpp ) simgear_component(nasal nasal "${SOURCES}" "${HEADERS}") diff --git a/simgear/nasal/ThreadedGarbageCollector.cpp b/simgear/nasal/ThreadedGarbageCollector.cpp new file mode 100644 index 00000000..22133b0c --- /dev/null +++ b/simgear/nasal/ThreadedGarbageCollector.cpp @@ -0,0 +1,176 @@ +//#include "nasal.h" +//#include "data.h" +//#include "code.h" + +#include +#include +#include +#include +#include +#include +extern "C" { + extern int __bg_gc; + extern int GCglobalAlloc(); + extern int naGarbageCollect(); +} + +class SGExclusiveThread : public SGThread +{ +private: + std::mutex mutex_; + std::condition_variable condVar; + SGTimeStamp timestamp; + std::mutex Cmutex_; + std::condition_variable CcondVar; + + bool _started; + bool _terminated; + int last_await_time; + + std::atomic dataReady; + std::atomic complete; + std::atomic process_ran; + std::atomic process_running; + +public: + SGExclusiveThread() : + _started(false), _terminated(false), last_await_time(0), + dataReady(false), complete(true), process_ran(false), process_running(false) + { + } + + virtual ~SGExclusiveThread() + { + + } + + void release() { + std::unique_lock lck(mutex_); + if (!complete) { + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] not finished - skipping"); + return; + } + if (!complete.exchange(false)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (2)"); + if (dataReady.exchange(true)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (1)"); + condVar.notify_one(); + } + void wait() { + std::unique_lock lck(mutex_); + if (!dataReady) + { + do + { + condVar.wait(lck); + } while (!dataReady); + } + } + void clearAwaitCompletionTime() { + last_await_time = 0; + } + virtual void awaitCompletion() { + timestamp.stamp(); + std::unique_lock lck(Cmutex_); + if (!complete) + { + do { + CcondVar.wait(lck); + } while (!complete.load()); + } + + if (process_ran) { + last_await_time = timestamp.elapsedUSec(); + printf("await %5.1f ", last_await_time / 1000.0); + process_ran = 0; + } + } + + void setCompletion() { + std::unique_lock lck(Cmutex_); + if (!dataReady.exchange(false)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on dataReady failed (5)\n"); + + if (complete.exchange(true)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on complete failed (5)\n"); + CcondVar.notify_one(); + } + virtual int process() = 0; + virtual void run() + { + process_running = true; + while (!_terminated) { + wait(); + process_ran = process(); + setCompletion(); + } + process_running = false; + _terminated = false; + _started = false; + } + + void terminate() { + _terminated = true; + } + bool stop() + { + return true; + } + void ensure_running() + { + if (!_started) + { + _started = true; + start(); + } + } + bool is_running() + { + return process_running; + } + +}; + +class ThreadedGarbageCollector : public SGExclusiveThread +{ +public: + ThreadedGarbageCollector() : SGExclusiveThread() + { + } + virtual ~ThreadedGarbageCollector() + { + + } + + virtual int process() + { + return naGarbageCollect(); + } +}; + +ThreadedGarbageCollector gct; +extern"C" { + void startNasalBackgroundGarbageCollection() + { + gct.ensure_running(); + } + void stopNasalBackgroundGarbageCollection() + { + gct.terminate(); + } + void performNasalBackgroundGarbageCollection() + { + if (gct.is_running()) + gct.release(); + } + void awaitNasalGarbageCollectionComplete(bool can_wait) + { + if (gct.is_running()) + { + if (can_wait) + gct.awaitCompletion(); + else + gct.clearAwaitCompletionTime(); + } + } +} diff --git a/simgear/nasal/code.c b/simgear/nasal/code.c index 740b7058..b7d3674b 100644 --- a/simgear/nasal/code.c +++ b/simgear/nasal/code.c @@ -24,21 +24,21 @@ struct Globals* globals = 0; static naRef bindFunction(naContext ctx, struct Frame* f, naRef code); -char __name[3000] = { 0 }; -int init = 0; -void getSource(struct Context* c) { - naRef v = naGetSourceFile(c, 0); - init = 1; - if (!IS_NIL(v)) - snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0)); - else - *__name = 0; -} -char *getName() { - if (init) - return __name; - return "**"; -} +//char __name[3000] = { 0 }; +//int init = 0; +//void getSource(struct Context* c) { +// naRef v = naGetSourceFile(c, 0); +// init = 1; +// if (!IS_NIL(v)) +// snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0)); +// else +// *__name = 0; +//} +//char *getName() { +// if (init) +// return __name; +// return "**"; +//} #define ERR(c, msg) naRuntimeError((c),(msg)) void naRuntimeError(naContext c, const char* fmt, ...) { @@ -172,7 +172,7 @@ static void initContext(naContext c) c->error[0] = 0; c->userData = 0; } - +#define BASE_SIZE 256000 static void initGlobals() { int i; @@ -183,10 +183,10 @@ static void initGlobals() globals->sem = naNewSem(); globals->lock = naNewLock(); - globals->allocCount = 256; // reasonable starting value + globals->allocCount = BASE_SIZE; // reasonable starting value for(i=0; ipools[i]), i); - globals->deadsz = 256; + globals->deadsz = BASE_SIZE; globals->ndead = 0; globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); @@ -320,7 +320,7 @@ static void checkNamedArgs(naContext ctx, struct naCode* c, struct naHash* h) static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named) { - getSource(ctx); + //getSource(ctx); naRef *args, func, code, obj = naNil(); struct Frame* f; int opf = ctx->opTop - nargs; @@ -351,8 +351,9 @@ static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named f->ip = 0; f->bp = ctx->opFrame; - if(mcall) naHash_set(f->locals, globals->meRef, obj); - + if (mcall) { + naHash_set(f->locals, globals->meRef, obj); + } if(named) checkNamedArgs(ctx, PTR(code).code, PTR(f->locals).hash); else setupArgs(ctx, f, args, nargs); @@ -921,8 +922,9 @@ naRef naCall(naContext ctx, naRef func, int argc, naRef* args, func = naNewFunc(ctx, func); PTR(func).func->namespace = locals; } - if(!IS_NIL(obj)) + if (!IS_NIL(obj)) { naHash_set(locals, globals->meRef, obj); + } ctx->opTop = ctx->markTop = 0; ctx->fTop = 1; diff --git a/simgear/nasal/cppbind/detail/to_nasal_helper.cxx b/simgear/nasal/cppbind/detail/to_nasal_helper.cxx index 752a7d5a..e2a04270 100644 --- a/simgear/nasal/cppbind/detail/to_nasal_helper.cxx +++ b/simgear/nasal/cppbind/detail/to_nasal_helper.cxx @@ -25,6 +25,12 @@ #include +#include +#include +#include +#include + + namespace nasal { //---------------------------------------------------------------------------- @@ -123,4 +129,116 @@ namespace nasal ); } + template class FastStack + { + public: + T* st; + int allocationSize; + int lastIndex; + //std::mutex mutex_; + + public: + FastStack(int stackSize); + ~FastStack(); + + inline void resize(int newSize); + inline void push(T x); + inline void pop(); + inline void clear(); + inline void iterate(int(*process)(naRef v)); + inline size_t size() { + return lastIndex + 1; + } + T top() + { + //std::unique_lock lck(mutex_); + return st[lastIndex]; + } + void push_if_not_present(naRef r); + }; + + template + FastStack::FastStack(int stackSize) + { + st = NULL; + this->allocationSize = stackSize; + st = (T*)malloc(stackSize * sizeof(naRef)); + lastIndex = -1; + } + template + FastStack::~FastStack() + { + delete[] st; + } + + template + void FastStack::clear() + { + lastIndex = -1; + } + + template + void FastStack::push_if_not_present(naRef r) { + /*for (int i = 0; i <= lastIndex; i++) + if (st[i] == r) + return;*/ + push(r); + } + template + void FastStack::iterate(int(*process)(naRef v)) + { + for (int i = 0; i <= lastIndex; i++) + if (process(st[i])) + break; + } + + template + void FastStack::pop() + { + --lastIndex; + } + + template + void FastStack::push(T x) + { + if (++lastIndex >= allocationSize) + resize(allocationSize * 2); + st[lastIndex] = x; + } + + template + void FastStack::resize(int newSize) + { + //std::unique_lock lck(mutex_); + T* new_st = (T*)realloc(st, newSize * sizeof(naRef)); + if (new_st) + { + st = new_st; + allocationSize = newSize; + SG_LOG(SG_NASAL, SG_WARN, "Increased tc stack to " << allocationSize); + } + else + throw "Failed to grow tc stack"; + } + FastStack < naRef> t_stack(40); + extern"C" { + + + int __stack_hwm = 0; + void na_t_stack_push(naRef v) { + t_stack.push(v); + + if (t_stack.size() > __stack_hwm) + __stack_hwm = t_stack.size(); + } + extern int na_t_stack_count() { + return t_stack.size(); + } + extern naRef na_t_stack_pop() + { + naRef v = t_stack.top(); + t_stack.pop(); + return v; + } + } } // namespace nasal diff --git a/simgear/nasal/gc.c b/simgear/nasal/gc.c index 7c0a9183..d62ceab4 100644 --- a/simgear/nasal/gc.c +++ b/simgear/nasal/gc.c @@ -5,6 +5,8 @@ static void reap(struct naPool* p); static void mark(naRef r); +static void process_all(naRef r, int(*process)(naRef r)); + struct Block { int size; @@ -14,14 +16,15 @@ struct Block { // Must be called with the giant exclusive lock! extern void global_stamp(); extern int global_elapsedUSec(); -extern char *getName(); -static void freeDead() +int nasal_gc_old = 0; + +static int freeDead() { int i; for(i=0; indead; i++) naFree(globals->deadBlocks[i]); globals->ndead = 0; - printf("--> freedead (%d) : %d", i, global_elapsedUSec()); + return i; } static void marktemps(struct Context* c) @@ -34,54 +37,105 @@ static void marktemps(struct Context* c) } } +int __elements_visited = 0; +extern int __stack_hwm; +int busy=0; // Must be called with the big lock! static void garbageCollect() { + if (busy) + return; + busy = 1; int i; struct Context* c; globals->allocCount = 0; c = globals->allContexts; - while(c) { - for(i=0; iallContexts; + while (c) { + ctxc++; + for (i = 0; i < NUM_NASAL_TYPES; i++) c->nfree[i] = 0; - for(i=0; i < c->fTop; i++) { + for (i = 0; i < c->fTop; i++) { mark(c->fStack[i].func); mark(c->fStack[i].locals); } - for(i=0; i < c->opTop; i++) + for (i = 0; i < c->opTop; i++) mark(c->opStack[i]); mark(c->dieArg); marktemps(c); c = c->nextAll; } - printf("--> garbageCollect: %d ", global_elapsedUSec()); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("--> garbageCollect(#e%-5d): %-4d ", eel, et); mark(globals->save); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("s(%5d) %-5d ", eel, et); + mark(globals->save_hash); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("h(%5d) %-5d ", eel, et); + + mark(globals->symbols); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("sy(%5d) %-4d ", eel, et); + mark(globals->meRef); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("me(%5d) %-5d ", eel, et); + mark(globals->argRef); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("ar(%5d) %-5d ", eel, et); + mark(globals->parentsRef); - - printf("m> %d", global_elapsedUSec()); - + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf(" ev[%3d] %-5d", eel, et); // Finally collect all the freed objects for (i = 0; i < NUM_NASAL_TYPES; i++) { reap(&(globals->pools[i])); - printf(" p(%d)> %d", i, global_elapsedUSec()); } + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(" >> reap %-5d", et); // Make enough space for the dead blocks we need to free during // execution. This works out to 1 spot for every 2 live objects, // which should be limit the number of bottleneck operations // without imposing an undue burden of extra "freeable" memory. if(globals->deadsz < globals->allocCount) { globals->deadsz = globals->allocCount; - if(globals->deadsz < 256) globals->deadsz = 256; + if(globals->deadsz < 256000) globals->deadsz = 256000; naFree(globals->deadBlocks); globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); } globals->needGC = 0; - printf(">> %d ", global_elapsedUSec()); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(">> %-5d ", et); + busy = 0; } void naModLock() @@ -119,16 +173,33 @@ static void bottleneck() UNLOCK(); naSemDown(g->sem); LOCK(); g->waitCount--; } - printf("bottleneck wait finished %d usec", global_elapsedUSec()); + printf("GC: wait %2d ", global_elapsedUSec()); if(g->waitCount >= g->nThreads - 1) { - freeDead(); - //if(g->needGC) + int fd = freeDead(); + printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec()); + if(g->needGC) garbageCollect(); if(g->waitCount) naSemUp(g->sem, g->waitCount); g->bottleneck = 0; } - char *c = getName(); - printf("bottleneck finished: %d %s\n", global_elapsedUSec(), c); + printf(" :: finished: %5d\n", global_elapsedUSec()); +} + +static void bottleneckFreeDead() +{ + global_stamp(); + struct Globals* g = globals; + g->bottleneck = 1; + while (g->bottleneck && g->waitCount < g->nThreads - 1) { + g->waitCount++; + UNLOCK(); naSemDown(g->sem); LOCK(); + g->waitCount--; + } + if (g->waitCount >= g->nThreads - 1) { + freeDead(); + if (g->waitCount) naSemUp(g->sem, g->waitCount); + g->bottleneck = 0; + } } void naGC() @@ -139,6 +210,29 @@ void naGC() UNLOCK(); naCheckBottleneck(); } +int naGarbageCollect() +{ + int rv = 1; + LOCK(); + // + // The number here is again based on observation - if this is too low then the inline GC will be used + // which is fine occasionally. + // So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if + // so we pre-empt this by doing it now. + // GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed + // prior to the start of the next frame. + + globals->needGC = nasal_globals->allocCount < 23000; + if (globals->needGC) + bottleneck(); + else { + bottleneckFreeDead(); + rv = 0; + } + UNLOCK(); + naCheckBottleneck(); + return rv; +} void naCheckBottleneck() { @@ -219,7 +313,9 @@ static int poolsize(struct naPool* p) while(b) { total += b->size; b = b->next; } return total; } - +int GCglobalAlloc() { + return globals->allocCount; +} struct naObj** naGC_get(struct naPool* p, int n, int* nout) { struct naObj** result; @@ -227,6 +323,7 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout) LOCK(); while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) { globals->needGC = 1; + printf("++"); bottleneck(); } if(p->nfree == 0) @@ -239,51 +336,130 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout) UNLOCK(); return result; } +extern void na_t_stack_push(naRef v); +extern int na_t_stack_count(); +extern naRef na_t_stack_pop(); -static void markvec(naRef r) +static void oldmarkvec(naRef r) { int i; struct VecRec* vr = PTR(r).vec->rec; - if(!vr) return; - for(i=0; isize; i++) + if (!vr) return; + for (i = 0; isize; i++) mark(vr->array[i]); } // Sets the reference bit on the object, and recursively on all // objects reachable from it. Uses the processor stack for recursion... -static void mark(naRef r) +static void oldmark(naRef r) { int i; - if(IS_NUM(r) || IS_NIL(r)) + if (IS_NUM(r) || IS_NIL(r)) return; - if(PTR(r).obj->mark == 1) + if (PTR(r).obj->mark == 1) return; PTR(r).obj->mark = 1; - switch(PTR(r).obj->type) { - case T_VEC: markvec(r); break; + switch (PTR(r).obj->type) { + case T_VEC: oldmarkvec(r); break; case T_HASH: naiGCMarkHash(r); break; case T_CODE: - mark(PTR(r).code->srcFile); - for(i=0; inConstants; i++) + oldmark(PTR(r).code->srcFile); + for (i = 0; inConstants; i++) mark(PTR(r).code->constants[i]); break; case T_FUNC: - mark(PTR(r).func->code); - mark(PTR(r).func->namespace); - mark(PTR(r).func->next); + oldmark(PTR(r).func->code); + oldmark(PTR(r).func->namespace); + oldmark(PTR(r).func->next); break; case T_GHOST: - mark(PTR(r).ghost->data); + oldmark(PTR(r).ghost->data); break; } } +void oldnaiGCMark(naRef r) +{ + oldmark(r); +} + +static int do_mark(naRef r) +{ + if (IS_NUM(r) || IS_NIL(r)) + return 1; + + if (PTR(r).obj->mark == 1) + return 1; + PTR(r).obj->mark = 1; + return 0; +} + +static void mark(naRef r) { + if (nasal_gc_old) + oldmark(r); + else + process_all(r, do_mark); +} + +static void process_all(naRef r, int (*process)(naRef r)) +{ + na_t_stack_push(r); + __elements_visited++; + while (na_t_stack_count() != 0) + { + naRef r = na_t_stack_pop(); + if ((*process)(r)) + continue; + + switch (PTR(r).obj->type) { + case T_VEC: { + int i; + struct VecRec* vr = PTR(r).vec->rec; + if (vr) { + for (i = 0; i < vr->size; i++) { + na_t_stack_push(vr->array[i]); + __elements_visited++; + } + } + break; + } + case T_HASH: naiGCMarkHash(r); break; + case T_CODE: + { + int i; + na_t_stack_push(PTR(r).code->srcFile); + for (i = 0; i < PTR(r).code->nConstants; i++) { + na_t_stack_push(PTR(r).code->constants[i]); + __elements_visited++; + } + break; + } + case T_FUNC: + __elements_visited++; + __elements_visited++; + __elements_visited++; + na_t_stack_push(PTR(r).func->code); + na_t_stack_push(PTR(r).func->namespace); + na_t_stack_push(PTR(r).func->next); + break; + case T_GHOST: + na_t_stack_push(PTR(r).ghost->data); + __elements_visited++; + break; + } + } +} void naiGCMark(naRef r) { - mark(r); + if (oldmark) + oldnaiGCMark(r); + else { + na_t_stack_push(r); + __elements_visited++; + } } // Collects all the unreachable objects into a free list, and @@ -304,9 +480,9 @@ static void reap(struct naPool* p) p->free = p->free0; for(b = p->blocks; b; b = b->next) - for(elem=0; elem < b->size; elem++) { + for (elem = 0; elem < b->size; elem++) { struct naObj* o = (struct naObj*)(b->block + elem * p->elemsz); - if(o->mark == 0) + if (o->mark == 0) freeelem(p, o); o->mark = 0; } @@ -318,11 +494,18 @@ static void reap(struct naPool* p) // Allocate more if necessary (try to keep 25-50% of the objects // available) - if(p->nfree < total/4) { + //if(p->nfree < total/4) { + // int used = total - p->nfree; + // int avail = total - used; + // int need = used/2 - avail; + // if(need > 0) + // newBlock(p, need); + //} + if (p->nfree < total / 2) { int used = total - p->nfree; int avail = total - used; - int need = used/2 - avail; - if(need > 0) + int need = used / 1 - avail; + if (need > 0) newBlock(p, need); } } diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c index 3679a0ca..dee061da 100644 --- a/simgear/nasal/hash.c +++ b/simgear/nasal/hash.c @@ -176,6 +176,17 @@ void naiGCMarkHash(naRef hash) } } +void oldnaiGCMarkHash(naRef hash) +{ + int i; + HashRec* hr = REC(hash); + for (i = 0; hr && i < NCELLS(hr); i++) + if (TAB(hr)[i] >= 0) { + oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].key); + oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].val); + } +} + static void tmpStr(naRef* out, struct naStr* str, const char* key) { str->type = T_STR; diff --git a/simgear/nasal/misc.c b/simgear/nasal/misc.c index 764d113d..d2790cab 100644 --- a/simgear/nasal/misc.c +++ b/simgear/nasal/misc.c @@ -65,7 +65,7 @@ naRef naStringValue(naContext c, naRef r) naRef naNew(struct Context* c, int type) { - getSource(c); + //getSource(c); naRef result; if(c->nfree[type] == 0) c->free[type] = naGC_get(&globals->pools[type],