Added background (threaded) garbage collector

This commit is contained in:
Richard Harrison 2019-05-13 15:21:08 +02:00
parent b3ef2478f5
commit 7354201b5d
7 changed files with 554 additions and 63 deletions

View File

@ -28,6 +28,7 @@ set(SOURCES
code.h
data.h
parse.h
ThreadedGarbageCollector.cpp
)
simgear_component(nasal nasal "${SOURCES}" "${HEADERS}")

View File

@ -0,0 +1,176 @@
//#include "nasal.h"
//#include "data.h"
//#include "code.h"
#include <simgear/timing/timestamp.hxx>
#include <simgear/threads/SGThread.hxx>
#include <simgear/debug/logstream.hxx>
#include <mutex>
#include <condition_variable>
#include <atomic>
extern "C" {
extern int __bg_gc;
extern int GCglobalAlloc();
extern int naGarbageCollect();
}
class SGExclusiveThread : public SGThread
{
private:
std::mutex mutex_;
std::condition_variable condVar;
SGTimeStamp timestamp;
std::mutex Cmutex_;
std::condition_variable CcondVar;
bool _started;
bool _terminated;
int last_await_time;
std::atomic<bool> dataReady;
std::atomic<bool> complete;
std::atomic<bool> process_ran;
std::atomic<bool> process_running;
public:
SGExclusiveThread() :
_started(false), _terminated(false), last_await_time(0),
dataReady(false), complete(true), process_ran(false), process_running(false)
{
}
virtual ~SGExclusiveThread()
{
}
void release() {
std::unique_lock<std::mutex> lck(mutex_);
if (!complete) {
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] not finished - skipping");
return;
}
if (!complete.exchange(false))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (2)");
if (dataReady.exchange(true))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] concurrent failure (1)");
condVar.notify_one();
}
void wait() {
std::unique_lock<std::mutex> lck(mutex_);
if (!dataReady)
{
do
{
condVar.wait(lck);
} while (!dataReady);
}
}
void clearAwaitCompletionTime() {
last_await_time = 0;
}
virtual void awaitCompletion() {
timestamp.stamp();
std::unique_lock<std::mutex> lck(Cmutex_);
if (!complete)
{
do {
CcondVar.wait(lck);
} while (!complete.load());
}
if (process_ran) {
last_await_time = timestamp.elapsedUSec();
printf("await %5.1f ", last_await_time / 1000.0);
process_ran = 0;
}
}
void setCompletion() {
std::unique_lock<std::mutex> lck(Cmutex_);
if (!dataReady.exchange(false))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on dataReady failed (5)\n");
if (complete.exchange(true))
SG_LOG(SG_NASAL, SG_ALERT, "[SGExclusiveThread] atomic operation on complete failed (5)\n");
CcondVar.notify_one();
}
virtual int process() = 0;
virtual void run()
{
process_running = true;
while (!_terminated) {
wait();
process_ran = process();
setCompletion();
}
process_running = false;
_terminated = false;
_started = false;
}
void terminate() {
_terminated = true;
}
bool stop()
{
return true;
}
void ensure_running()
{
if (!_started)
{
_started = true;
start();
}
}
bool is_running()
{
return process_running;
}
};
class ThreadedGarbageCollector : public SGExclusiveThread
{
public:
ThreadedGarbageCollector() : SGExclusiveThread()
{
}
virtual ~ThreadedGarbageCollector()
{
}
virtual int process()
{
return naGarbageCollect();
}
};
ThreadedGarbageCollector gct;
extern"C" {
void startNasalBackgroundGarbageCollection()
{
gct.ensure_running();
}
void stopNasalBackgroundGarbageCollection()
{
gct.terminate();
}
void performNasalBackgroundGarbageCollection()
{
if (gct.is_running())
gct.release();
}
void awaitNasalGarbageCollectionComplete(bool can_wait)
{
if (gct.is_running())
{
if (can_wait)
gct.awaitCompletion();
else
gct.clearAwaitCompletionTime();
}
}
}

View File

@ -24,21 +24,21 @@ struct Globals* globals = 0;
static naRef bindFunction(naContext ctx, struct Frame* f, naRef code);
char __name[3000] = { 0 };
int init = 0;
void getSource(struct Context* c) {
naRef v = naGetSourceFile(c, 0);
init = 1;
if (!IS_NIL(v))
snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0));
else
*__name = 0;
}
char *getName() {
if (init)
return __name;
return "**";
}
//char __name[3000] = { 0 };
//int init = 0;
//void getSource(struct Context* c) {
// naRef v = naGetSourceFile(c, 0);
// init = 1;
// if (!IS_NIL(v))
// snprintf(__name, 3000, "%s:%d", naStr_data(v), naGetLine(c, 0));
// else
// *__name = 0;
//}
//char *getName() {
// if (init)
// return __name;
// return "**";
//}
#define ERR(c, msg) naRuntimeError((c),(msg))
void naRuntimeError(naContext c, const char* fmt, ...)
{
@ -172,7 +172,7 @@ static void initContext(naContext c)
c->error[0] = 0;
c->userData = 0;
}
#define BASE_SIZE 256000
static void initGlobals()
{
int i;
@ -183,10 +183,10 @@ static void initGlobals()
globals->sem = naNewSem();
globals->lock = naNewLock();
globals->allocCount = 256; // reasonable starting value
globals->allocCount = BASE_SIZE; // reasonable starting value
for(i=0; i<NUM_NASAL_TYPES; i++)
naGC_init(&(globals->pools[i]), i);
globals->deadsz = 256;
globals->deadsz = BASE_SIZE;
globals->ndead = 0;
globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);
@ -320,7 +320,7 @@ static void checkNamedArgs(naContext ctx, struct naCode* c, struct naHash* h)
static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named)
{
getSource(ctx);
//getSource(ctx);
naRef *args, func, code, obj = naNil();
struct Frame* f;
int opf = ctx->opTop - nargs;
@ -351,8 +351,9 @@ static struct Frame* setupFuncall(naContext ctx, int nargs, int mcall, int named
f->ip = 0;
f->bp = ctx->opFrame;
if(mcall) naHash_set(f->locals, globals->meRef, obj);
if (mcall) {
naHash_set(f->locals, globals->meRef, obj);
}
if(named) checkNamedArgs(ctx, PTR(code).code, PTR(f->locals).hash);
else setupArgs(ctx, f, args, nargs);
@ -921,8 +922,9 @@ naRef naCall(naContext ctx, naRef func, int argc, naRef* args,
func = naNewFunc(ctx, func);
PTR(func).func->namespace = locals;
}
if(!IS_NIL(obj))
if (!IS_NIL(obj)) {
naHash_set(locals, globals->meRef, obj);
}
ctx->opTop = ctx->markTop = 0;
ctx->fTop = 1;

View File

@ -25,6 +25,12 @@
#include <boost/function.hpp>
#include <simgear/threads/SGThread.hxx>
#include <mutex>
#include <condition_variable>
#include <atomic>
namespace nasal
{
//----------------------------------------------------------------------------
@ -123,4 +129,116 @@ namespace nasal
);
}
template <class T> class FastStack
{
public:
T* st;
int allocationSize;
int lastIndex;
//std::mutex mutex_;
public:
FastStack(int stackSize);
~FastStack();
inline void resize(int newSize);
inline void push(T x);
inline void pop();
inline void clear();
inline void iterate(int(*process)(naRef v));
inline size_t size() {
return lastIndex + 1;
}
T top()
{
//std::unique_lock<std::mutex> lck(mutex_);
return st[lastIndex];
}
void push_if_not_present(naRef r);
};
template <class T>
FastStack<T>::FastStack(int stackSize)
{
st = NULL;
this->allocationSize = stackSize;
st = (T*)malloc(stackSize * sizeof(naRef));
lastIndex = -1;
}
template <class T>
FastStack<T>::~FastStack()
{
delete[] st;
}
template <class T>
void FastStack<T>::clear()
{
lastIndex = -1;
}
template <class T>
void FastStack<T>::push_if_not_present(naRef r) {
/*for (int i = 0; i <= lastIndex; i++)
if (st[i] == r)
return;*/
push(r);
}
template <class T>
void FastStack<T>::iterate(int(*process)(naRef v))
{
for (int i = 0; i <= lastIndex; i++)
if (process(st[i]))
break;
}
template <class T>
void FastStack<T>::pop()
{
--lastIndex;
}
template <class T>
void FastStack<T>::push(T x)
{
if (++lastIndex >= allocationSize)
resize(allocationSize * 2);
st[lastIndex] = x;
}
template <class T>
void FastStack<T>::resize(int newSize)
{
//std::unique_lock<std::mutex> lck(mutex_);
T* new_st = (T*)realloc(st, newSize * sizeof(naRef));
if (new_st)
{
st = new_st;
allocationSize = newSize;
SG_LOG(SG_NASAL, SG_WARN, "Increased tc stack to " << allocationSize);
}
else
throw "Failed to grow tc stack";
}
FastStack < naRef> t_stack(40);
extern"C" {
int __stack_hwm = 0;
void na_t_stack_push(naRef v) {
t_stack.push(v);
if (t_stack.size() > __stack_hwm)
__stack_hwm = t_stack.size();
}
extern int na_t_stack_count() {
return t_stack.size();
}
extern naRef na_t_stack_pop()
{
naRef v = t_stack.top();
t_stack.pop();
return v;
}
}
} // namespace nasal

View File

@ -5,6 +5,8 @@
static void reap(struct naPool* p);
static void mark(naRef r);
static void process_all(naRef r, int(*process)(naRef r));
struct Block {
int size;
@ -14,14 +16,15 @@ struct Block {
// Must be called with the giant exclusive lock!
extern void global_stamp();
extern int global_elapsedUSec();
extern char *getName();
static void freeDead()
int nasal_gc_old = 0;
static int freeDead()
{
int i;
for(i=0; i<globals->ndead; i++)
naFree(globals->deadBlocks[i]);
globals->ndead = 0;
printf("--> freedead (%d) : %d", i, global_elapsedUSec());
return i;
}
static void marktemps(struct Context* c)
@ -34,54 +37,105 @@ static void marktemps(struct Context* c)
}
}
int __elements_visited = 0;
extern int __stack_hwm;
int busy=0;
// Must be called with the big lock!
static void garbageCollect()
{
if (busy)
return;
busy = 1;
int i;
struct Context* c;
globals->allocCount = 0;
c = globals->allContexts;
while(c) {
for(i=0; i<NUM_NASAL_TYPES; i++)
int ctxc = 0;
__elements_visited = 0;
__stack_hwm = 0;
int st = global_elapsedUSec();
int et = 0;
int stel = __elements_visited;
int eel = 0;
c = globals->allContexts;
while (c) {
ctxc++;
for (i = 0; i < NUM_NASAL_TYPES; i++)
c->nfree[i] = 0;
for(i=0; i < c->fTop; i++) {
for (i = 0; i < c->fTop; i++) {
mark(c->fStack[i].func);
mark(c->fStack[i].locals);
}
for(i=0; i < c->opTop; i++)
for (i = 0; i < c->opTop; i++)
mark(c->opStack[i]);
mark(c->dieArg);
marktemps(c);
c = c->nextAll;
}
printf("--> garbageCollect: %d ", global_elapsedUSec());
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("--> garbageCollect(#e%-5d): %-4d ", eel, et);
mark(globals->save);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("s(%5d) %-5d ", eel, et);
mark(globals->save_hash);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
printf("h(%5d) %-5d ", eel, et);
mark(globals->symbols);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("sy(%5d) %-4d ", eel, et);
mark(globals->meRef);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("me(%5d) %-5d ", eel, et);
mark(globals->argRef);
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf("ar(%5d) %-5d ", eel, et);
mark(globals->parentsRef);
printf("m> %d", global_elapsedUSec());
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
eel = __elements_visited - stel; stel = __elements_visited;
//printf(" ev[%3d] %-5d", eel, et);
// Finally collect all the freed objects
for (i = 0; i < NUM_NASAL_TYPES; i++) {
reap(&(globals->pools[i]));
printf(" p(%d)> %d", i, global_elapsedUSec());
}
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
printf(" >> reap %-5d", et);
// Make enough space for the dead blocks we need to free during
// execution. This works out to 1 spot for every 2 live objects,
// which should be limit the number of bottleneck operations
// without imposing an undue burden of extra "freeable" memory.
if(globals->deadsz < globals->allocCount) {
globals->deadsz = globals->allocCount;
if(globals->deadsz < 256) globals->deadsz = 256;
if(globals->deadsz < 256000) globals->deadsz = 256000;
naFree(globals->deadBlocks);
globals->deadBlocks = naAlloc(sizeof(void*) * globals->deadsz);
}
globals->needGC = 0;
printf(">> %d ", global_elapsedUSec());
et = global_elapsedUSec() - st;
st = global_elapsedUSec();
printf(">> %-5d ", et);
busy = 0;
}
void naModLock()
@ -119,16 +173,33 @@ static void bottleneck()
UNLOCK(); naSemDown(g->sem); LOCK();
g->waitCount--;
}
printf("bottleneck wait finished %d usec", global_elapsedUSec());
printf("GC: wait %2d ", global_elapsedUSec());
if(g->waitCount >= g->nThreads - 1) {
freeDead();
//if(g->needGC)
int fd = freeDead();
printf("--> freedead (%5d) : %5d", fd, global_elapsedUSec());
if(g->needGC)
garbageCollect();
if(g->waitCount) naSemUp(g->sem, g->waitCount);
g->bottleneck = 0;
}
char *c = getName();
printf("bottleneck finished: %d %s\n", global_elapsedUSec(), c);
printf(" :: finished: %5d\n", global_elapsedUSec());
}
static void bottleneckFreeDead()
{
global_stamp();
struct Globals* g = globals;
g->bottleneck = 1;
while (g->bottleneck && g->waitCount < g->nThreads - 1) {
g->waitCount++;
UNLOCK(); naSemDown(g->sem); LOCK();
g->waitCount--;
}
if (g->waitCount >= g->nThreads - 1) {
freeDead();
if (g->waitCount) naSemUp(g->sem, g->waitCount);
g->bottleneck = 0;
}
}
void naGC()
@ -139,6 +210,29 @@ void naGC()
UNLOCK();
naCheckBottleneck();
}
int naGarbageCollect()
{
int rv = 1;
LOCK();
//
// The number here is again based on observation - if this is too low then the inline GC will be used
// which is fine occasionally.
// So what we're doing by checking the global alloc is to see if GC is likely required during the next frame and if
// so we pre-empt this by doing it now.
// GC can typically take between 5ms and 50ms (F-15, FG1000 PFD & MFD, Advanced weather) - but usually it is completed
// prior to the start of the next frame.
globals->needGC = nasal_globals->allocCount < 23000;
if (globals->needGC)
bottleneck();
else {
bottleneckFreeDead();
rv = 0;
}
UNLOCK();
naCheckBottleneck();
return rv;
}
void naCheckBottleneck()
{
@ -219,7 +313,9 @@ static int poolsize(struct naPool* p)
while(b) { total += b->size; b = b->next; }
return total;
}
int GCglobalAlloc() {
return globals->allocCount;
}
struct naObj** naGC_get(struct naPool* p, int n, int* nout)
{
struct naObj** result;
@ -227,6 +323,7 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout)
LOCK();
while(globals->allocCount < 0 || (p->nfree == 0 && p->freetop >= p->freesz)) {
globals->needGC = 1;
printf("++");
bottleneck();
}
if(p->nfree == 0)
@ -239,51 +336,130 @@ struct naObj** naGC_get(struct naPool* p, int n, int* nout)
UNLOCK();
return result;
}
extern void na_t_stack_push(naRef v);
extern int na_t_stack_count();
extern naRef na_t_stack_pop();
static void markvec(naRef r)
static void oldmarkvec(naRef r)
{
int i;
struct VecRec* vr = PTR(r).vec->rec;
if(!vr) return;
for(i=0; i<vr->size; i++)
if (!vr) return;
for (i = 0; i<vr->size; i++)
mark(vr->array[i]);
}
// Sets the reference bit on the object, and recursively on all
// objects reachable from it. Uses the processor stack for recursion...
static void mark(naRef r)
static void oldmark(naRef r)
{
int i;
if(IS_NUM(r) || IS_NIL(r))
if (IS_NUM(r) || IS_NIL(r))
return;
if(PTR(r).obj->mark == 1)
if (PTR(r).obj->mark == 1)
return;
PTR(r).obj->mark = 1;
switch(PTR(r).obj->type) {
case T_VEC: markvec(r); break;
switch (PTR(r).obj->type) {
case T_VEC: oldmarkvec(r); break;
case T_HASH: naiGCMarkHash(r); break;
case T_CODE:
mark(PTR(r).code->srcFile);
for(i=0; i<PTR(r).code->nConstants; i++)
oldmark(PTR(r).code->srcFile);
for (i = 0; i<PTR(r).code->nConstants; i++)
mark(PTR(r).code->constants[i]);
break;
case T_FUNC:
mark(PTR(r).func->code);
mark(PTR(r).func->namespace);
mark(PTR(r).func->next);
oldmark(PTR(r).func->code);
oldmark(PTR(r).func->namespace);
oldmark(PTR(r).func->next);
break;
case T_GHOST:
mark(PTR(r).ghost->data);
oldmark(PTR(r).ghost->data);
break;
}
}
void oldnaiGCMark(naRef r)
{
oldmark(r);
}
static int do_mark(naRef r)
{
if (IS_NUM(r) || IS_NIL(r))
return 1;
if (PTR(r).obj->mark == 1)
return 1;
PTR(r).obj->mark = 1;
return 0;
}
static void mark(naRef r) {
if (nasal_gc_old)
oldmark(r);
else
process_all(r, do_mark);
}
static void process_all(naRef r, int (*process)(naRef r))
{
na_t_stack_push(r);
__elements_visited++;
while (na_t_stack_count() != 0)
{
naRef r = na_t_stack_pop();
if ((*process)(r))
continue;
switch (PTR(r).obj->type) {
case T_VEC: {
int i;
struct VecRec* vr = PTR(r).vec->rec;
if (vr) {
for (i = 0; i < vr->size; i++) {
na_t_stack_push(vr->array[i]);
__elements_visited++;
}
}
break;
}
case T_HASH: naiGCMarkHash(r); break;
case T_CODE:
{
int i;
na_t_stack_push(PTR(r).code->srcFile);
for (i = 0; i < PTR(r).code->nConstants; i++) {
na_t_stack_push(PTR(r).code->constants[i]);
__elements_visited++;
}
break;
}
case T_FUNC:
__elements_visited++;
__elements_visited++;
__elements_visited++;
na_t_stack_push(PTR(r).func->code);
na_t_stack_push(PTR(r).func->namespace);
na_t_stack_push(PTR(r).func->next);
break;
case T_GHOST:
na_t_stack_push(PTR(r).ghost->data);
__elements_visited++;
break;
}
}
}
void naiGCMark(naRef r)
{
mark(r);
if (oldmark)
oldnaiGCMark(r);
else {
na_t_stack_push(r);
__elements_visited++;
}
}
// Collects all the unreachable objects into a free list, and
@ -304,9 +480,9 @@ static void reap(struct naPool* p)
p->free = p->free0;
for(b = p->blocks; b; b = b->next)
for(elem=0; elem < b->size; elem++) {
for (elem = 0; elem < b->size; elem++) {
struct naObj* o = (struct naObj*)(b->block + elem * p->elemsz);
if(o->mark == 0)
if (o->mark == 0)
freeelem(p, o);
o->mark = 0;
}
@ -318,11 +494,18 @@ static void reap(struct naPool* p)
// Allocate more if necessary (try to keep 25-50% of the objects
// available)
if(p->nfree < total/4) {
//if(p->nfree < total/4) {
// int used = total - p->nfree;
// int avail = total - used;
// int need = used/2 - avail;
// if(need > 0)
// newBlock(p, need);
//}
if (p->nfree < total / 2) {
int used = total - p->nfree;
int avail = total - used;
int need = used/2 - avail;
if(need > 0)
int need = used / 1 - avail;
if (need > 0)
newBlock(p, need);
}
}

View File

@ -176,6 +176,17 @@ void naiGCMarkHash(naRef hash)
}
}
void oldnaiGCMarkHash(naRef hash)
{
int i;
HashRec* hr = REC(hash);
for (i = 0; hr && i < NCELLS(hr); i++)
if (TAB(hr)[i] >= 0) {
oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].key);
oldnaiGCMark(ENTS(hr)[TAB(hr)[i]].val);
}
}
static void tmpStr(naRef* out, struct naStr* str, const char* key)
{
str->type = T_STR;

View File

@ -65,7 +65,7 @@ naRef naStringValue(naContext c, naRef r)
naRef naNew(struct Context* c, int type)
{
getSource(c);
//getSource(c);
naRef result;
if(c->nfree[type] == 0)
c->free[type] = naGC_get(&globals->pools[type],