diff --git a/simgear/nasal/CMakeLists.txt b/simgear/nasal/CMakeLists.txt index 8f4d2c90..e5881450 100644 --- a/simgear/nasal/CMakeLists.txt +++ b/simgear/nasal/CMakeLists.txt @@ -28,6 +28,7 @@ set(SOURCES code.h data.h parse.h + ThreadedGarbageCollector.cpp ) simgear_component(nasal nasal "${SOURCES}" "${HEADERS}") diff --git a/simgear/nasal/ThreadedGarbageCollector.cpp b/simgear/nasal/ThreadedGarbageCollector.cpp new file mode 100644 index 00000000..22133b0c --- /dev/null +++ b/simgear/nasal/ThreadedGarbageCollector.cpp @@ -0,0 +1,176 @@ +//#include "nasal.h" +//#include "data.h" +//#include "code.h" + +#include +#include +#include +#include +#include +#include +extern "C" { + extern int __bg_gc; + extern int GCglobalAlloc(); + extern int naGarbageCollect(); +} + +class SGExclusiveThread : public SGThread +{ +private: + std::mutex mutex_; + std::condition_variable condVar; + SGTimeStamp timestamp; + std::mutex Cmutex_; + std::condition_variable CcondVar; + + bool _started; + bool _terminated; + int last_await_time; + + std::atomic dataReady; + std::atomic complete; + std::atomic process_ran; + std::atomic process_running; + +public: + SGExclusiveThread() : + _started(false), _terminated(false), last_await_time(0), + dataReady(false), complete(true), process_ran(false), process_running(false) + { + } + + virtual ~SGExclusiveThread() + { + + } + + void release() { + std::unique_lock lck(mutex_); + if (!complete) { + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] not finished - skipping"); + return; + } + if (!complete.exchange(false)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (2)"); + if (dataReady.exchange(true)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (1)"); + condVar.notify_one(); + } + void wait() { + std::unique_lock lck(mutex_); + if (!dataReady) + { + do + { + condVar.wait(lck); + } while (!dataReady); + } + } + void clearAwaitCompletionTime() { + last_await_time = 0; + } + virtual void awaitCompletion() { + timestamp.stamp(); + std::unique_lock lck(Cmutex_); + if (!complete) + { + do { + CcondVar.wait(lck); + } while (!complete.load()); + } + + if (process_ran) { + last_await_time = timestamp.elapsedUSec(); + printf("await %5.1f ", last_await_time / 1000.0); + process_ran = 0; + } + } + + void setCompletion() { + std::unique_lock lck(Cmutex_); + if (!dataReady.exchange(false)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on dataReady failed (5)\n"); + + if (complete.exchange(true)) + SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on complete failed (5)\n"); + CcondVar.notify_one(); + } + virtual int process() = 0; + virtual void run() + { + process_running = true; + while (!_terminated) { + wait(); + process_ran = process(); + setCompletion(); + } + process_running = false; + _terminated = false; + _started = false; + } + + void terminate() { + _terminated = true; + } + bool stop() + { + return true; + } + void ensure_running() + { + if (!_started) + { + _started = true; + start(); + } + } + bool is_running() + { + return process_running; + } + +}; + +class ThreadedGarbageCollector : public SGExclusiveThread +{ +public: + ThreadedGarbageCollector() : SGExclusiveThread() + { + } + virtual ~ThreadedGarbageCollector() + { + + } + + virtual int process() + { + return naGarbageCollect(); + } +}; + +ThreadedGarbageCollector gct; +extern"C" { + void startNasalBackgroundGarbageCollection() + { + gct.ensure_running(); + } + void stopNasalBackgroundGarbageCollection() + { + gct.terminate(); + } + void performNasalBackgroundGarbageCollection() + { + if (gct.is_running()) + gct.release(); + } + void awaitNasalGarbageCollectionComplete(bool can_wait) + { + if (gct.is_running()) + { + if (can_wait) + gct.awaitCompletion(); + else + gct.clearAwaitCompletionTime(); + } + } +} diff --git a/simgear/nasal/code.c b/simgear/nasal/code.c index faf534ec..b7d3674b 100644 --- a/simgear/nasal/code.c +++ b/simgear/nasal/code.c @@ -24,6 +24,21 @@ struct Globals* globals = 0; static naRef bindFunction(naContext ctx, struct Frame* f, naRef code); +//char __name[3000] = { 0 }; +//int init = 0; +//void getSource(struct Context* c) { +// naRef v = naGetSourceFile(c, 0); +// init = 1; +// if (!IS_NIL(v)) +// snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0)); +// else +// *__name = 0; +//} +//char *getName() { +// if (init) +// return __name; +// return "**"; +//} #define ERR(c, msg) naRuntimeError((c),(msg)) void naRuntimeError(naContext c, const char* fmt, ...) { @@ -157,7 +172,7 @@ static void initContext(naContext c) c->error[0] = 0; c->userData = 0; } - +#define BASE_SIZE 256000 static void initGlobals() { int i; @@ -168,10 +183,10 @@ static void initGlobals() globals->sem = naNewSem(); globals->lock = naNewLock(); - globals->allocCount = 256; // reasonable starting value + globals->allocCount = BASE_SIZE; // reasonable starting value for(i=0; ipools[i]), i); - globals->deadsz = 256; + globals->deadsz = BASE_SIZE; globals->ndead = 0; globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); @@ -305,6 +320,7 @@ static void checkNamedArgs(naContext ctx, struct naCode* c, struct naHash* h) static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named) { + //getSource(ctx); naRef *args, func, code, obj = naNil(); struct Frame* f; int opf = ctx->opTop - nargs; @@ -335,8 +351,9 @@ static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named f->ip = 0; f->bp = ctx->opFrame; - if(mcall) naHash_set(f->locals, globals->meRef, obj); - + if (mcall) { + naHash_set(f->locals, globals->meRef, obj); + } if(named) checkNamedArgs(ctx, PTR(code).code, PTR(f->locals).hash); else setupArgs(ctx, f, args, nargs); @@ -833,9 +850,13 @@ naRef naGetSourceFile(naContext ctx, int frame) { naRef f; frame = findFrame(ctx, &ctx, frame); - f = ctx->fStack[frame].func; - f = PTR(f).func->code; - return PTR(f).code->srcFile; + if (frame >= 0) { + f = ctx->fStack[frame].func; + f = PTR(f).func->code; + if (!IS_NIL(f) && PTR(f).code) + return PTR(f).code->srcFile; + } + return naNil(); } char* naGetError(naContext ctx) @@ -901,8 +922,9 @@ naRef naCall(naContext ctx, naRef func, int argc, naRef* args, func = naNewFunc(ctx, func); PTR(func).func->namespace = locals; } - if(!IS_NIL(obj)) + if (!IS_NIL(obj)) { naHash_set(locals, globals->meRef, obj); + } ctx->opTop = ctx->markTop = 0; ctx->fTop = 1; diff --git a/simgear/nasal/gc.c b/simgear/nasal/gc.c index 5ac9c43c..d62ceab4 100644 --- a/simgear/nasal/gc.c +++ b/simgear/nasal/gc.c @@ -1,25 +1,30 @@ #include "nasal.h" #include "data.h" #include "code.h" - #define MIN_BLOCK_SIZE 32 static void reap(struct naPool* p); static void mark(naRef r); +static void process_all(naRef r, int(*process)(naRef r)); + struct Block { int size; char* block; struct Block* next; }; - // Must be called with the giant exclusive lock! -static void freeDead() +extern void global_stamp(); +extern int global_elapsedUSec(); +int nasal_gc_old = 0; + +static int freeDead() { int i; for(i=0; indead; i++) naFree(globals->deadBlocks[i]); globals->ndead = 0; + return i; } static void marktemps(struct Context* c) @@ -32,49 +37,105 @@ static void marktemps(struct Context* c) } } +int __elements_visited = 0; +extern int __stack_hwm; +int busy=0; // Must be called with the big lock! static void garbageCollect() { + if (busy) + return; + busy = 1; int i; struct Context* c; globals->allocCount = 0; c = globals->allContexts; - while(c) { - for(i=0; iallContexts; + while (c) { + ctxc++; + for (i = 0; i < NUM_NASAL_TYPES; i++) c->nfree[i] = 0; - for(i=0; i < c->fTop; i++) { + for (i = 0; i < c->fTop; i++) { mark(c->fStack[i].func); mark(c->fStack[i].locals); } - for(i=0; i < c->opTop; i++) + for (i = 0; i < c->opTop; i++) mark(c->opStack[i]); mark(c->dieArg); marktemps(c); c = c->nextAll; } + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("--> garbageCollect(#e%-5d): %-4d ", eel, et); mark(globals->save); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("s(%5d) %-5d ", eel, et); + mark(globals->save_hash); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + printf("h(%5d) %-5d ", eel, et); + + mark(globals->symbols); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("sy(%5d) %-4d ", eel, et); + mark(globals->meRef); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("me(%5d) %-5d ", eel, et); + mark(globals->argRef); + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf("ar(%5d) %-5d ", eel, et); + mark(globals->parentsRef); - + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + eel = __elements_visited - stel; stel = __elements_visited; + //printf(" ev[%3d] %-5d", eel, et); // Finally collect all the freed objects - for(i=0; ipools[i])); - + } + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(" >> reap %-5d", et); // Make enough space for the dead blocks we need to free during // execution. This works out to 1 spot for every 2 live objects, // which should be limit the number of bottleneck operations // without imposing an undue burden of extra "freeable" memory. if(globals->deadsz < globals->allocCount) { globals->deadsz = globals->allocCount; - if(globals->deadsz < 256) globals->deadsz = 256; + if(globals->deadsz < 256000) globals->deadsz = 256000; naFree(globals->deadBlocks); globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz); } globals->needGC = 0; + et = global_elapsedUSec() - st; + st = global_elapsedUSec(); + printf(">> %-5d ", et); + busy = 0; } void naModLock() @@ -104,6 +165,7 @@ void naModUnlock() // you think about it). static void bottleneck() { + global_stamp(); struct Globals* g = globals; g->bottleneck = 1; while(g->bottleneck && g->waitCount < g->nThreads - 1) { @@ -111,12 +173,33 @@ static void bottleneck() UNLOCK(); naSemDown(g->sem); LOCK(); g->waitCount--; } + printf("GC: wait %2d ", global_elapsedUSec()); if(g->waitCount >= g->nThreads - 1) { - freeDead(); - if(g->needGC) garbageCollect(); + int fd = freeDead(); + printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec()); + if(g->needGC) + garbageCollect(); if(g->waitCount) naSemUp(g->sem, g->waitCount); g->bottleneck = 0; } + printf(" :: finished: %5d\n", global_elapsedUSec()); +} + +static void bottleneckFreeDead() +{ + global_stamp(); + struct Globals* g = globals; + g->bottleneck = 1; + while (g->bottleneck && g->waitCount < g->nThreads - 1) { + g->waitCount++; + UNLOCK(); naSemDown(g->sem); LOCK(); + g->waitCount--; + } + if (g->waitCount >= g->nThreads - 1) { + freeDead(); + if (g->waitCount) naSemUp(g->sem, g->waitCount); + g->bottleneck = 0; + } } void naGC() @@ -127,6 +210,29 @@ void naGC() UNLOCK(); naCheckBottleneck(); } +int naGarbageCollect() +{ + int rv = 1; + LOCK(); + // + // The number here is again based on observation - if this is too low then the inline GC will be used + // which is fine occasionally. + // So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if + // so we pre-empt this by doing it now. + // GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed + // prior to the start of the next frame. + + globals->needGC = nasal_globals->allocCount < 23000; + if (globals->needGC) + bottleneck(); + else { + bottleneckFreeDead(); + rv = 0; + } + UNLOCK(); + naCheckBottleneck(); + return rv; +} void naCheckBottleneck() { @@ -207,7 +313,9 @@ static int poolsize(struct naPool* p) while(b) { total += b->size; b = b->next; } return total; } - +int GCglobalAlloc() { + return globals->allocCount; +} struct naObj** naGC_get(struct naPool* p, int n, int* nout) { struct naObj** result; @@ -215,6 +323,7 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout) LOCK(); while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) { globals->needGC = 1; + printf("++"); bottleneck(); } if(p->nfree == 0) @@ -227,51 +336,130 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout) UNLOCK(); return result; } +extern void na_t_stack_push(naRef v); +extern int na_t_stack_count(); +extern naRef na_t_stack_pop(); -static void markvec(naRef r) +static void oldmarkvec(naRef r) { int i; struct VecRec* vr = PTR(r).vec->rec; - if(!vr) return; - for(i=0; isize; i++) + if (!vr) return; + for (i = 0; isize; i++) mark(vr->array[i]); } // Sets the reference bit on the object, and recursively on all // objects reachable from it. Uses the processor stack for recursion... -static void mark(naRef r) +static void oldmark(naRef r) { int i; - if(IS_NUM(r) || IS_NIL(r)) + if (IS_NUM(r) || IS_NIL(r)) return; - if(PTR(r).obj->mark == 1) + if (PTR(r).obj->mark == 1) return; PTR(r).obj->mark = 1; - switch(PTR(r).obj->type) { - case T_VEC: markvec(r); break; + switch (PTR(r).obj->type) { + case T_VEC: oldmarkvec(r); break; case T_HASH: naiGCMarkHash(r); break; case T_CODE: - mark(PTR(r).code->srcFile); - for(i=0; inConstants; i++) + oldmark(PTR(r).code->srcFile); + for (i = 0; inConstants; i++) mark(PTR(r).code->constants[i]); break; case T_FUNC: - mark(PTR(r).func->code); - mark(PTR(r).func->namespace); - mark(PTR(r).func->next); + oldmark(PTR(r).func->code); + oldmark(PTR(r).func->namespace); + oldmark(PTR(r).func->next); break; case T_GHOST: - mark(PTR(r).ghost->data); + oldmark(PTR(r).ghost->data); break; } } +void oldnaiGCMark(naRef r) +{ + oldmark(r); +} + +static int do_mark(naRef r) +{ + if (IS_NUM(r) || IS_NIL(r)) + return 1; + + if (PTR(r).obj->mark == 1) + return 1; + PTR(r).obj->mark = 1; + return 0; +} + +static void mark(naRef r) { + if (nasal_gc_old) + oldmark(r); + else + process_all(r, do_mark); +} + +static void process_all(naRef r, int (*process)(naRef r)) +{ + na_t_stack_push(r); + __elements_visited++; + while (na_t_stack_count() != 0) + { + naRef r = na_t_stack_pop(); + if ((*process)(r)) + continue; + + switch (PTR(r).obj->type) { + case T_VEC: { + int i; + struct VecRec* vr = PTR(r).vec->rec; + if (vr) { + for (i = 0; i < vr->size; i++) { + na_t_stack_push(vr->array[i]); + __elements_visited++; + } + } + break; + } + case T_HASH: naiGCMarkHash(r); break; + case T_CODE: + { + int i; + na_t_stack_push(PTR(r).code->srcFile); + for (i = 0; i < PTR(r).code->nConstants; i++) { + na_t_stack_push(PTR(r).code->constants[i]); + __elements_visited++; + } + break; + } + case T_FUNC: + __elements_visited++; + __elements_visited++; + __elements_visited++; + na_t_stack_push(PTR(r).func->code); + na_t_stack_push(PTR(r).func->namespace); + na_t_stack_push(PTR(r).func->next); + break; + case T_GHOST: + na_t_stack_push(PTR(r).ghost->data); + __elements_visited++; + break; + } + } +} void naiGCMark(naRef r) { - mark(r); + if (oldmark) + oldnaiGCMark(r); + else { + na_t_stack_push(r); + __elements_visited++; + } } // Collects all the unreachable objects into a free list, and @@ -292,9 +480,9 @@ static void reap(struct naPool* p) p->free = p->free0; for(b = p->blocks; b; b = b->next) - for(elem=0; elem < b->size; elem++) { + for (elem = 0; elem < b->size; elem++) { struct naObj* o = (struct naObj*)(b->block + elem * p->elemsz); - if(o->mark == 0) + if (o->mark == 0) freeelem(p, o); o->mark = 0; } @@ -306,11 +494,18 @@ static void reap(struct naPool* p) // Allocate more if necessary (try to keep 25-50% of the objects // available) - if(p->nfree < total/4) { + //if(p->nfree < total/4) { + // int used = total - p->nfree; + // int avail = total - used; + // int need = used/2 - avail; + // if(need > 0) + // newBlock(p, need); + //} + if (p->nfree < total / 2) { int used = total - p->nfree; int avail = total - used; - int need = used/2 - avail; - if(need > 0) + int need = used / 1 - avail; + if (need > 0) newBlock(p, need); } } diff --git a/simgear/nasal/hash.c b/simgear/nasal/hash.c index 3679a0ca..dee061da 100644 --- a/simgear/nasal/hash.c +++ b/simgear/nasal/hash.c @@ -176,6 +176,17 @@ void naiGCMarkHash(naRef hash) } } +void oldnaiGCMarkHash(naRef hash) +{ + int i; + HashRec* hr = REC(hash); + for (i = 0; hr && i < NCELLS(hr); i++) + if (TAB(hr)[i] >= 0) { + oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].key); + oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].val); + } +} + static void tmpStr(naRef* out, struct naStr* str, const char* key) { str->type = T_STR; diff --git a/simgear/nasal/lib.c b/simgear/nasal/lib.c index 528803b5..87e6bd6b 100644 --- a/simgear/nasal/lib.c +++ b/simgear/nasal/lib.c @@ -295,28 +295,31 @@ static naRef f_die(naContext c, naRef me, int argc, naRef* args) return naNil(); // never executes } -// Wrapper around vsnprintf, iteratively increasing the buffer size -// until it fits. Returned buffer should be freed by the caller. +// Wrapper around vsnprintf that will allocate the required size +// by calling vsnprintf with NULL and 0 - and vsnsprintf will measure the +// required amount of characters which we then allocate and return +// Returned buffer should be freed by the caller. static char* dosprintf(char* f, ...) { char* buf; va_list va; - int olen, len = 16; + int len = 0; va_start(va, f); - while(1) { - buf = naAlloc(len); - va_list vaCopy; - va_copy(vaCopy, va); - olen = vsnprintf(buf, len, f, vaCopy); - if(olen >= 0 && olen < len) { - va_end(va); - va_end(vaCopy); - return buf; - } - va_end(vaCopy); - naFree(buf); - len *= 2; + va_list vaCopy; + va_copy(vaCopy, va); + len = vsnprintf(0, 0, f, vaCopy); + if (len <= 0) { + buf = naAlloc(2); + *buf = 0; } + else { + len++;// allow for terminating null + buf = naAlloc(len); + len = vsnprintf(buf, len, f, vaCopy); + } + va_end(va); + va_end(vaCopy); + return buf; } // Inspects a printf format string f, and finds the next "%..." format diff --git a/simgear/nasal/misc.c b/simgear/nasal/misc.c index 0a5c8615..d2790cab 100644 --- a/simgear/nasal/misc.c +++ b/simgear/nasal/misc.c @@ -65,6 +65,7 @@ naRef naStringValue(naContext c, naRef r) naRef naNew(struct Context* c, int type) { + //getSource(c); naRef result; if(c->nfree[type] == 0) c->free[type] = naGC_get(&globals->pools[type], diff --git a/simgear/scene/tgdb/TreeBin.cxx b/simgear/scene/tgdb/TreeBin.cxx index 7befc6a5..2d755750 100644 --- a/simgear/scene/tgdb/TreeBin.cxx +++ b/simgear/scene/tgdb/TreeBin.cxx @@ -65,6 +65,7 @@ namespace simgear bool use_tree_shadows; bool use_tree_normals; +//OpenThreads::ReentrantMutex treeAddMutex; // Tree instance scheme: // vertex - local position of quad vertex. @@ -325,6 +326,15 @@ struct QuadTreeCleaner : public osg::NodeVisitor QuadTreeCleaner() : NodeVisitor(NodeVisitor::TRAVERSE_ALL_CHILDREN) { } + virtual ~QuadTreeCleaner() { + if (cleanupList.size()) { + for (auto const& x : cleanupList){ + x.second->removeChild(x.first); + } + } + } + std::map cleanupList; + // lod.removeChildren(i, 1); void apply(LOD& lod) { for (int i = lod.getNumChildren() - 1; i >= 0; --i) { @@ -347,7 +357,7 @@ struct QuadTreeCleaner : public osg::NodeVisitor } } if (geodeEmpty) - lod.removeChildren(i, 1); + cleanupList[lod.getChild(i)] = &lod; } } }; @@ -359,6 +369,7 @@ struct QuadTreeCleaner : public osg::NodeVisitor osg::Group* createForest(SGTreeBinList& forestList, const osg::Matrix& transform, const SGReaderWriterOptions* options) { +// OpenThreads::ScopedLock lock(treeAddMutex); Matrix transInv = Matrix::inverse(transform); static Matrix ident; // Set up some shared structures. diff --git a/simgear/timing/timestamp.cxx b/simgear/timing/timestamp.cxx index fcb56836..03d20d59 100644 --- a/simgear/timing/timestamp.cxx +++ b/simgear/timing/timestamp.cxx @@ -345,3 +345,13 @@ int SGTimeStamp::elapsedUSec() const return static_cast((now - *this).toUSecs()); } +extern "C" { + SGTimeStamp global_timestamp; + void global_stamp() { + global_timestamp.stamp(); + } + extern int global_elapsedUSec() + { + return global_timestamp.elapsedUSec(); + } +} \ No newline at end of file