Nasal GC background threaded merge.

Merge branch 'nasal-background-thredead-gc'
This commit is contained in:
Richard Harrison 2019-06-08 10:07:48 +02:00
commit cba902f22b
9 changed files with 490 additions and 60 deletions

View File

@ -28,6 +28,7 @@ set(SOURCES
code.h
data.h
parse.h
ThreadedGarbageCollector.cpp
)
simgear_component(nasal nasal "${SOURCES}" "${HEADERS}")

View File

@ -0,0 +1,176 @@
//#include "nasal.h"
//#include "data.h"
//#include "code.h"
#include <simgear/timing/timestamp.hxx>
#include <simgear/threads/SGThread.hxx>
#include <simgear/debug/logstream.hxx>
#include <mutex>
#include <condition_variable>
#include <atomic>
extern "C" {
extern int __bg_gc;
extern int GCglobalAlloc();
extern int naGarbageCollect();
}
class SGExclusiveThread : public SGThread
{
private:
std::mutex mutex_;
std::condition_variable condVar;
SGTimeStamp timestamp;
std::mutex Cmutex_;
std::condition_variable CcondVar;
bool _started;
bool _terminated;
int last_await_time;
std::atomic<bool> dataReady;
std::atomic<bool> complete;
std::atomic<bool> process_ran;
std::atomic<bool> process_running;
public:
SGExclusiveThread() :
_started(false), _terminated(false), last_await_time(0),
dataReady(false), complete(true), process_ran(false), process_running(false)
{
}
virtual ~SGExclusiveThread()
{
}
void release() {
std::unique_lock<std::mutex> lck(mutex_);
if (!complete) {
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] not finished - skipping");
return;
}
if (!complete.exchange(false))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (2)");
if (dataReady.exchange(true))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (1)");
condVar.notify_one();
}
void wait() {
std::unique_lock<std::mutex> lck(mutex_);
if (!dataReady)
{
do
{
condVar.wait(lck);
} while (!dataReady);
}
}
void clearAwaitCompletionTime() {
last_await_time = 0;
}
virtual void awaitCompletion() {
timestamp.stamp();
std::unique_lock<std::mutex> lck(Cmutex_);
if (!complete)
{
do {
CcondVar.wait(lck);
} while (!complete.load());
}
if (process_ran) {
last_await_time = timestamp.elapsedUSec();
printf("await %5.1f ", last_await_time / 1000.0);
process_ran = 0;
}
}
void setCompletion() {
std::unique_lock<std::mutex> lck(Cmutex_);
if (!dataReady.exchange(false))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on dataReady failed (5)\n");
if (complete.exchange(true))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on complete failed (5)\n");
CcondVar.notify_one();
}
virtual int process() = 0;
virtual void run()
{
process_running = true;
while (!_terminated) {
wait();
process_ran = process();
setCompletion();
}
process_running = false;
_terminated = false;
_started = false;
}
void terminate() {
_terminated = true;
}
bool stop()
{
return true;
}
void ensure_running()
{
if (!_started)
{
_started = true;
start();
}
}
bool is_running()
{
return process_running;
}
};
class ThreadedGarbageCollector : public SGExclusiveThread
{
public:
ThreadedGarbageCollector() : SGExclusiveThread()
{
}
virtual ~ThreadedGarbageCollector()
{
}
virtual int process()
{
return naGarbageCollect();
}
};
ThreadedGarbageCollector gct;
extern"C" {
void startNasalBackgroundGarbageCollection()
{
gct.ensure_running();
}
void stopNasalBackgroundGarbageCollection()
{
gct.terminate();
}
void performNasalBackgroundGarbageCollection()
{
if (gct.is_running())
gct.release();
}
void awaitNasalGarbageCollectionComplete(bool can_wait)
{
if (gct.is_running())
{
if (can_wait)
gct.awaitCompletion();
else
gct.clearAwaitCompletionTime();
}
}
}

View File

@ -24,6 +24,21 @@ struct Globals* globals = 0;
static naRef bindFunction(naContext ctx, struct Frame* f, naRef code);
//char __name[3000] = { 0 };
//int init = 0;
//void getSource(struct Context* c) {
// naRef v = naGetSourceFile(c, 0);
// init = 1;
// if (!IS_NIL(v))
// snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0));
// else
// *__name = 0;
//}
//char *getName() {
// if (init)
// return __name;
// return "**";
//}
#define ERR(c, msg) naRuntimeError((c),(msg))
void naRuntimeError(naContext c, const char* fmt, ...)
{
@ -157,7 +172,7 @@ static void initContext(naContext c)
c->error[0] = 0;
c->userData = 0;
}
#define BASE_SIZE 256000
static void initGlobals()
{
int i;
@ -168,10 +183,10 @@ static void initGlobals()
globals->sem = naNewSem();
globals->lock = naNewLock();
globals->allocCount = 256; // reasonable starting value
globals->allocCount = BASE_SIZE; // reasonable starting value
for(i=0; i<NUM_NASAL_TYPES; i++)
naGC_init(&(globals->pools[i]), i);
globals->deadsz = 256;
globals->deadsz = BASE_SIZE;
globals->ndead = 0;
globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);
@ -305,6 +320,7 @@ static void checkNamedArgs(naContext ctx, struct naCode* c, struct naHash* h)
static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named)
{
//getSource(ctx);
naRef *args, func, code, obj = naNil();
struct Frame* f;
int opf = ctx->opTop - nargs;
@ -335,8 +351,9 @@ static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named
f->ip = 0;
f->bp = ctx->opFrame;
if(mcall) naHash_set(f->locals, globals->meRef, obj);
if (mcall) {
naHash_set(f->locals, globals->meRef, obj);
}
if(named) checkNamedArgs(ctx, PTR(code).code, PTR(f->locals).hash);
else setupArgs(ctx, f, args, nargs);
@ -833,9 +850,13 @@ naRef naGetSourceFile(naContext ctx, int frame)
{
naRef f;
frame = findFrame(ctx, &ctx, frame);
f = ctx->fStack[frame].func;
f = PTR(f).func->code;
return PTR(f).code->srcFile;
if (frame >= 0) {
f = ctx->fStack[frame].func;
f = PTR(f).func->code;
if (!IS_NIL(f) && PTR(f).code)
return PTR(f).code->srcFile;
}
return naNil();
}
char* naGetError(naContext ctx)
@ -901,8 +922,9 @@ naRef naCall(naContext ctx, naRef func, int argc, naRef* args,
func = naNewFunc(ctx, func);
PTR(func).func->namespace = locals;
}
if(!IS_NIL(obj))
if (!IS_NIL(obj)) {
naHash_set(locals, globals->meRef, obj);
}
ctx->opTop = ctx->markTop = 0;
ctx->fTop = 1;

View File

@ -1,25 +1,30 @@
#include "nasal.h"
#include "data.h"
#include "code.h"
#define MIN_BLOCK_SIZE 32
static void reap(struct naPool* p);
static void mark(naRef r);
static void process_all(naRef r, int(*process)(naRef r));
struct Block {
int size;
char* block;
struct Block* next;
};
// Must be called with the giant exclusive lock!
static void freeDead()
extern void global_stamp();
extern int global_elapsedUSec();
int nasal_gc_old = 0;
static int freeDead()
{
int i;
for(i=0; i<globals->ndead; i++)
naFree(globals->deadBlocks[i]);
globals->ndead = 0;
return i;
}
static void marktemps(struct Context* c)
@ -32,49 +37,105 @@ static void marktemps(struct Context* c)
}
}
int __elements_visited = 0;
extern int __stack_hwm;
int busy=0;
// Must be called with the big lock!
static void garbageCollect()
{
if (busy)
return;
busy = 1;
int i;
struct Context* c;
globals->allocCount = 0;
c = globals->allContexts;
while(c) {
for(i=0; i<NUM_NASAL_TYPES; i++)
int ctxc = 0;
__elements_visited = 0;
__stack_hwm = 0;
int st = global_elapsedUSec();
int et = 0;
int stel = __elements_visited;
int eel = 0;
c = globals->allContexts;
while (c) {
ctxc++;
for (i = 0; i < NUM_NASAL_TYPES; i++)
c->nfree[i] = 0;
for(i=0; i < c->fTop; i++) {
for (i = 0; i < c->fTop; i++) {
mark(c->fStack[i].func);
mark(c->fStack[i].locals);
}
for(i=0; i < c->opTop; i++)
for (i = 0; i < c->opTop; i++)
mark(c->opStack[i]);
mark(c->dieArg);
marktemps(c);
c = c->nextAll;
}
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("--> garbageCollect(#e%-5d): %-4d ", eel, et);
mark(globals->save);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("s(%5d) %-5d ", eel, et);
mark(globals->save_hash);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("h(%5d) %-5d ", eel, et);
mark(globals->symbols);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("sy(%5d) %-4d ", eel, et);
mark(globals->meRef);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("me(%5d) %-5d ", eel, et);
mark(globals->argRef);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("ar(%5d) %-5d ", eel, et);
mark(globals->parentsRef);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf(" ev[%3d] %-5d", eel, et);
// Finally collect all the freed objects
for(i=0; i<NUM_NASAL_TYPES; i++)
for (i = 0; i < NUM_NASAL_TYPES; i++) {
reap(&(globals->pools[i]));
}
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
printf(" >> reap %-5d", et);
// Make enough space for the dead blocks we need to free during
// execution. This works out to 1 spot for every 2 live objects,
// which should be limit the number of bottleneck operations
// without imposing an undue burden of extra "freeable" memory.
if(globals->deadsz < globals->allocCount) {
globals->deadsz = globals->allocCount;
if(globals->deadsz < 256) globals->deadsz = 256;
if(globals->deadsz < 256000) globals->deadsz = 256000;
naFree(globals->deadBlocks);
globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);
}
globals->needGC = 0;
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
printf(">> %-5d ", et);
busy = 0;
}
void naModLock()
@ -104,6 +165,7 @@ void naModUnlock()
// you think about it).
static void bottleneck()
{
global_stamp();
struct Globals* g = globals;
g->bottleneck = 1;
while(g->bottleneck && g->waitCount < g->nThreads - 1) {
@ -111,12 +173,33 @@ static void bottleneck()
UNLOCK(); naSemDown(g->sem); LOCK();
g->waitCount--;
}
printf("GC: wait %2d ", global_elapsedUSec());
if(g->waitCount >= g->nThreads - 1) {
freeDead();
if(g->needGC) garbageCollect();
int fd = freeDead();
printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec());
if(g->needGC)
garbageCollect();
if(g->waitCount) naSemUp(g->sem, g->waitCount);
g->bottleneck = 0;
}
printf(" :: finished: %5d\n", global_elapsedUSec());
}
static void bottleneckFreeDead()
{
global_stamp();
struct Globals* g = globals;
g->bottleneck = 1;
while (g->bottleneck && g->waitCount < g->nThreads - 1) {
g->waitCount++;
UNLOCK(); naSemDown(g->sem); LOCK();
g->waitCount--;
}
if (g->waitCount >= g->nThreads - 1) {
freeDead();
if (g->waitCount) naSemUp(g->sem, g->waitCount);
g->bottleneck = 0;
}
}
void naGC()
@ -127,6 +210,29 @@ void naGC()
UNLOCK();
naCheckBottleneck();
}
int naGarbageCollect()
{
int rv = 1;
LOCK();
//
// The number here is again based on observation - if this is too low then the inline GC will be used
// which is fine occasionally.
// So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if
// so we pre-empt this by doing it now.
// GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed
// prior to the start of the next frame.
globals->needGC = nasal_globals->allocCount < 23000;
if (globals->needGC)
bottleneck();
else {
bottleneckFreeDead();
rv = 0;
}
UNLOCK();
naCheckBottleneck();
return rv;
}
void naCheckBottleneck()
{
@ -207,7 +313,9 @@ static int poolsize(struct naPool* p)
while(b) { total += b->size; b = b->next; }
return total;
}
int GCglobalAlloc() {
return globals->allocCount;
}
struct naObj** naGC_get(struct naPool* p, int n, int* nout)
{
struct naObj** result;
@ -215,6 +323,7 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout)
LOCK();
while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) {
globals->needGC = 1;
printf("++");
bottleneck();
}
if(p->nfree == 0)
@ -227,51 +336,130 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout)
UNLOCK();
return result;
}
extern void na_t_stack_push(naRef v);
extern int na_t_stack_count();
extern naRef na_t_stack_pop();
static void markvec(naRef r)
static void oldmarkvec(naRef r)
{
int i;
struct VecRec* vr = PTR(r).vec->rec;
if(!vr) return;
for(i=0; i<vr->size; i++)
if (!vr) return;
for (i = 0; i<vr->size; i++)
mark(vr->array[i]);
}
// Sets the reference bit on the object, and recursively on all
// objects reachable from it. Uses the processor stack for recursion...
static void mark(naRef r)
static void oldmark(naRef r)
{
int i;
if(IS_NUM(r) || IS_NIL(r))
if (IS_NUM(r) || IS_NIL(r))
return;
if(PTR(r).obj->mark == 1)
if (PTR(r).obj->mark == 1)
return;
PTR(r).obj->mark = 1;
switch(PTR(r).obj->type) {
case T_VEC: markvec(r); break;
switch (PTR(r).obj->type) {
case T_VEC: oldmarkvec(r); break;
case T_HASH: naiGCMarkHash(r); break;
case T_CODE:
mark(PTR(r).code->srcFile);
for(i=0; i<PTR(r).code->nConstants; i++)
oldmark(PTR(r).code->srcFile);
for (i = 0; i<PTR(r).code->nConstants; i++)
mark(PTR(r).code->constants[i]);
break;
case T_FUNC:
mark(PTR(r).func->code);
mark(PTR(r).func->namespace);
mark(PTR(r).func->next);
oldmark(PTR(r).func->code);
oldmark(PTR(r).func->namespace);
oldmark(PTR(r).func->next);
break;
case T_GHOST:
mark(PTR(r).ghost->data);
oldmark(PTR(r).ghost->data);
break;
}
}
void oldnaiGCMark(naRef r)
{
oldmark(r);
}
static int do_mark(naRef r)
{
if (IS_NUM(r) || IS_NIL(r))
return 1;
if (PTR(r).obj->mark == 1)
return 1;
PTR(r).obj->mark = 1;
return 0;
}
static void mark(naRef r) {
if (nasal_gc_old)
oldmark(r);
else
process_all(r, do_mark);
}
static void process_all(naRef r, int (*process)(naRef r))
{
na_t_stack_push(r);
__elements_visited++;
while (na_t_stack_count() != 0)
{
naRef r = na_t_stack_pop();
if ((*process)(r))
continue;
switch (PTR(r).obj->type) {
case T_VEC: {
int i;
struct VecRec* vr = PTR(r).vec->rec;
if (vr) {
for (i = 0; i < vr->size; i++) {
na_t_stack_push(vr->array[i]);
__elements_visited++;
}
}
break;
}
case T_HASH: naiGCMarkHash(r); break;
case T_CODE:
{
int i;
na_t_stack_push(PTR(r).code->srcFile);
for (i = 0; i < PTR(r).code->nConstants; i++) {
na_t_stack_push(PTR(r).code->constants[i]);
__elements_visited++;
}
break;
}
case T_FUNC:
__elements_visited++;
__elements_visited++;
__elements_visited++;
na_t_stack_push(PTR(r).func->code);
na_t_stack_push(PTR(r).func->namespace);
na_t_stack_push(PTR(r).func->next);
break;
case T_GHOST:
na_t_stack_push(PTR(r).ghost->data);
__elements_visited++;
break;
}
}
}
void naiGCMark(naRef r)
{
mark(r);
if (oldmark)
oldnaiGCMark(r);
else {
na_t_stack_push(r);
__elements_visited++;
}
}
// Collects all the unreachable objects into a free list, and
@ -292,9 +480,9 @@ static void reap(struct naPool* p)
p->free = p->free0;
for(b = p->blocks; b; b = b->next)
for(elem=0; elem < b->size; elem++) {
for (elem = 0; elem < b->size; elem++) {
struct naObj* o = (struct naObj*)(b->block + elem * p->elemsz);
if(o->mark == 0)
if (o->mark == 0)
freeelem(p, o);
o->mark = 0;
}
@ -306,11 +494,18 @@ static void reap(struct naPool* p)
// Allocate more if necessary (try to keep 25-50% of the objects
// available)
if(p->nfree < total/4) {
//if(p->nfree < total/4) {
// int used = total - p->nfree;
// int avail = total - used;
// int need = used/2 - avail;
// if(need > 0)
// newBlock(p, need);
//}
if (p->nfree < total / 2) {
int used = total - p->nfree;
int avail = total - used;
int need = used/2 - avail;
if(need > 0)
int need = used / 1 - avail;
if (need > 0)
newBlock(p, need);
}
}

View File

@ -176,6 +176,17 @@ void naiGCMarkHash(naRef hash)
}
}
void oldnaiGCMarkHash(naRef hash)
{
int i;
HashRec* hr = REC(hash);
for (i = 0; hr && i < NCELLS(hr); i++)
if (TAB(hr)[i] >= 0) {
oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].key);
oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].val);
}
}
static void tmpStr(naRef* out, struct naStr* str, const char* key)
{
str->type = T_STR;

View File

@ -295,28 +295,31 @@ static naRef f_die(naContext c, naRef me, int argc, naRef* args)
return naNil(); // never executes
}
// Wrapper around vsnprintf, iteratively increasing the buffer size
// until it fits. Returned buffer should be freed by the caller.
// Wrapper around vsnprintf that will allocate the required size
// by calling vsnprintf with NULL and 0 - and vsnsprintf will measure the
// required amount of characters which we then allocate and return
// Returned buffer should be freed by the caller.
static char* dosprintf(char* f, ...)
{
char* buf;
va_list va;
int olen, len = 16;
int len = 0;
va_start(va, f);
while(1) {
buf = naAlloc(len);
va_list vaCopy;
va_copy(vaCopy, va);
olen = vsnprintf(buf, len, f, vaCopy);
if(olen >= 0 && olen < len) {
va_end(va);
va_end(vaCopy);
return buf;
}
va_end(vaCopy);
naFree(buf);
len *= 2;
va_list vaCopy;
va_copy(vaCopy, va);
len = vsnprintf(0, 0, f, vaCopy);
if (len <= 0) {
buf = naAlloc(2);
*buf = 0;
}
else {
len++;// allow for terminating null
buf = naAlloc(len);
len = vsnprintf(buf, len, f, vaCopy);
}
va_end(va);
va_end(vaCopy);
return buf;
}
// Inspects a printf format string f, and finds the next "%..." format

View File

@ -65,6 +65,7 @@ naRef naStringValue(naContext c, naRef r)
naRef naNew(struct Context* c, int type)
{
//getSource(c);
naRef result;
if(c->nfree[type] == 0)
c->free[type] = naGC_get(&globals->pools[type],

View File

@ -65,6 +65,7 @@ namespace simgear
bool use_tree_shadows;
bool use_tree_normals;
//OpenThreads::ReentrantMutex treeAddMutex;
// Tree instance scheme:
// vertex - local position of quad vertex.
@ -325,6 +326,15 @@ struct QuadTreeCleaner : public osg::NodeVisitor
QuadTreeCleaner() : NodeVisitor(NodeVisitor::TRAVERSE_ALL_CHILDREN)
{
}
virtual ~QuadTreeCleaner() {
if (cleanupList.size()) {
for (auto const& x : cleanupList){
x.second->removeChild(x.first);
}
}
}
std::map<Node*, LOD*> cleanupList;
// lod.removeChildren(i, 1);
void apply(LOD& lod)
{
for (int i = lod.getNumChildren() - 1; i >= 0; --i) {
@ -347,7 +357,7 @@ struct QuadTreeCleaner : public osg::NodeVisitor
}
}
if (geodeEmpty)
lod.removeChildren(i, 1);
cleanupList[lod.getChild(i)] = &lod;
}
}
};
@ -359,6 +369,7 @@ struct QuadTreeCleaner : public osg::NodeVisitor
osg::Group* createForest(SGTreeBinList& forestList, const osg::Matrix& transform,
const SGReaderWriterOptions* options)
{
// OpenThreads::ScopedLock<OpenThreads::ReentrantMutex> lock(treeAddMutex);
Matrix transInv = Matrix::inverse(transform);
static Matrix ident;
// Set up some shared structures.

View File

@ -345,3 +345,13 @@ int SGTimeStamp::elapsedUSec() const
return static_cast<int>((now - *this).toUSecs());
}
extern "C" {
SGTimeStamp global_timestamp;
void global_stamp() {
global_timestamp.stamp();
}
extern int global_elapsedUSec()
{
return global_timestamp.elapsedUSec();
}
}