TerraSync: switch to per-directory hash caching
Avoid a central hash cache becoming enormous, now we use the selective download scheme for the tile dirs. Hash name is changed to co-exist with older versions.
This commit is contained in:
parent
990c53fc9c
commit
8d6d671cc1
@ -81,6 +81,45 @@ std::string innerResultCodeAsString(HTTPRepository::ResultCode code) {
|
||||
return "Unknown response code";
|
||||
}
|
||||
|
||||
struct HashCacheEntry {
|
||||
std::string filePath;
|
||||
time_t modTime;
|
||||
size_t lengthBytes;
|
||||
std::string hashHex;
|
||||
};
|
||||
|
||||
using HashCache = std::unordered_map<std::string, HashCacheEntry>;
|
||||
|
||||
std::string computeHashForPath(const SGPath& p)
|
||||
{
|
||||
if (!p.exists())
|
||||
return {};
|
||||
|
||||
sha1nfo info;
|
||||
sha1_init(&info);
|
||||
|
||||
const int bufSize = 1024 * 1024;
|
||||
char* buf = static_cast<char*>(malloc(bufSize));
|
||||
if (!buf) {
|
||||
sg_io_exception("Couldn't allocate SHA1 computation buffer");
|
||||
}
|
||||
|
||||
size_t readLen;
|
||||
SGBinaryFile f(p);
|
||||
if (!f.open(SG_IO_IN)) {
|
||||
free(buf);
|
||||
throw sg_io_exception("Couldn't open file for compute hash", p);
|
||||
}
|
||||
while ((readLen = f.read(buf, bufSize)) > 0) {
|
||||
sha1_write(&info, buf, readLen);
|
||||
}
|
||||
|
||||
f.close();
|
||||
free(buf);
|
||||
std::string hashBytes((char*)sha1_result(&info), HASH_LENGTH);
|
||||
return strutils::encodeHex(hashBytes);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class HTTPDirectory
|
||||
@ -112,6 +151,9 @@ class HTTPDirectory
|
||||
typedef std::vector<ChildInfo> ChildInfoList;
|
||||
ChildInfoList children;
|
||||
|
||||
mutable HashCache hashes;
|
||||
mutable bool hashCacheDirty = false;
|
||||
|
||||
public:
|
||||
HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
|
||||
_repository(repo),
|
||||
@ -125,6 +167,8 @@ public:
|
||||
// already exists on disk
|
||||
parseDirIndex(children);
|
||||
std::sort(children.begin(), children.end());
|
||||
|
||||
parseHashCache();
|
||||
} catch (sg_exception& ) {
|
||||
// parsing cache failed
|
||||
children.clear();
|
||||
@ -150,7 +194,7 @@ public:
|
||||
{
|
||||
SGPath fpath(absolutePath());
|
||||
fpath.append(".dirindex");
|
||||
_repository->updatedFileContents(fpath, hash);
|
||||
updatedFileContents(fpath, hash);
|
||||
|
||||
children.clear();
|
||||
parseDirIndex(children);
|
||||
@ -179,20 +223,20 @@ public:
|
||||
size_t bufSize = 0;
|
||||
|
||||
for (const auto& child : children) {
|
||||
if (child.type != HTTPRepository::FileType)
|
||||
continue;
|
||||
if (child.type != HTTPRepository::FileType)
|
||||
continue;
|
||||
|
||||
if (child.path.exists())
|
||||
continue;
|
||||
if (child.path.exists())
|
||||
continue;
|
||||
|
||||
SGPath cp = _repository->installedCopyPath;
|
||||
cp.append(relativePath());
|
||||
cp.append(child.name);
|
||||
if (!cp.exists()) {
|
||||
continue;
|
||||
}
|
||||
SGPath cp = _repository->installedCopyPath;
|
||||
cp.append(relativePath());
|
||||
cp.append(child.name);
|
||||
if (!cp.exists()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SGBinaryFile src(cp);
|
||||
SGBinaryFile src(cp);
|
||||
SGBinaryFile dst(child.path);
|
||||
src.open(SG_IO_IN);
|
||||
dst.open(SG_IO_OUT);
|
||||
@ -388,7 +432,7 @@ public:
|
||||
_relativePath + "/" + file,
|
||||
HTTPRepository::REPO_ERROR_CHECKSUM);
|
||||
} else {
|
||||
_repository->updatedFileContents(it->path, hash);
|
||||
updatedFileContents(it->path, hash);
|
||||
_repository->updatedChildSuccessfully(_relativePath + "/" +
|
||||
file);
|
||||
|
||||
@ -453,6 +497,50 @@ public:
|
||||
fpath.append(file);
|
||||
_repository->failedToUpdateChild(fpath, status);
|
||||
}
|
||||
|
||||
std::string hashForPath(const SGPath& p) const
|
||||
{
|
||||
const auto ps = p.utf8Str();
|
||||
auto it = hashes.find(ps);
|
||||
if (it != hashes.end()) {
|
||||
const auto& entry = it->second;
|
||||
// ensure data on disk hasn't changed.
|
||||
// we could also use the file type here if we were paranoid
|
||||
if ((p.sizeInBytes() == entry.lengthBytes) && (p.modTime() == entry.modTime)) {
|
||||
return entry.hashHex;
|
||||
}
|
||||
|
||||
// entry in the cache, but it's stale so remove and fall through
|
||||
hashes.erase(it);
|
||||
}
|
||||
|
||||
std::string hash = computeHashForPath(p);
|
||||
updatedFileContents(p, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool isHashCacheDirty() const
|
||||
{
|
||||
return hashCacheDirty;
|
||||
}
|
||||
|
||||
void writeHashCache() const
|
||||
{
|
||||
if (!hashCacheDirty)
|
||||
return;
|
||||
|
||||
hashCacheDirty = false;
|
||||
|
||||
SGPath cachePath = absolutePath() / ".dirhash";
|
||||
sg_ofstream stream(cachePath, std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
for (const auto& e : hashes) {
|
||||
const auto& entry = e.second;
|
||||
stream << entry.filePath << "*" << entry.modTime << "*"
|
||||
<< entry.lengthBytes << "*" << entry.hashHex << "\n";
|
||||
}
|
||||
stream.close();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct ChildWithName
|
||||
@ -576,7 +664,7 @@ private:
|
||||
ok = _repository->deleteDirectory(fpath, path);
|
||||
} else {
|
||||
// remove the hash cache entry
|
||||
_repository->updatedFileContents(path, std::string());
|
||||
updatedFileContents(path, std::string());
|
||||
ok = path.remove();
|
||||
}
|
||||
|
||||
@ -594,9 +682,80 @@ private:
|
||||
if (child.type == HTTPRepository::TarballType)
|
||||
p.concat(
|
||||
".tgz"); // For tarballs the hash is against the tarball file itself
|
||||
return _repository->hashForPath(p);
|
||||
return hashForPath(p);
|
||||
}
|
||||
|
||||
void parseHashCache()
|
||||
{
|
||||
hashes.clear();
|
||||
SGPath cachePath = absolutePath() / ".dirhash";
|
||||
if (!cachePath.exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
sg_ifstream stream(cachePath, std::ios::in);
|
||||
|
||||
while (!stream.eof()) {
|
||||
std::string line;
|
||||
std::getline(stream, line);
|
||||
line = simgear::strutils::strip(line);
|
||||
if (line.empty() || line[0] == '#')
|
||||
continue;
|
||||
|
||||
string_list tokens = simgear::strutils::split(line, "*");
|
||||
if (tokens.size() < 4) {
|
||||
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
|
||||
continue;
|
||||
}
|
||||
const std::string nameData = simgear::strutils::strip(tokens[0]);
|
||||
const std::string timeData = simgear::strutils::strip(tokens[1]);
|
||||
const std::string sizeData = simgear::strutils::strip(tokens[2]);
|
||||
const std::string hashData = simgear::strutils::strip(tokens[3]);
|
||||
|
||||
if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty()) {
|
||||
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
|
||||
continue;
|
||||
}
|
||||
|
||||
HashCacheEntry entry;
|
||||
entry.filePath = nameData;
|
||||
entry.hashHex = hashData;
|
||||
entry.modTime = strtol(timeData.c_str(), NULL, 10);
|
||||
entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
|
||||
hashes.insert(std::make_pair(entry.filePath, entry));
|
||||
}
|
||||
}
|
||||
|
||||
void updatedFileContents(const SGPath& p, const std::string& newHash) const
|
||||
{
|
||||
// remove the existing entry
|
||||
const auto ps = p.utf8Str();
|
||||
auto it = hashes.find(ps);
|
||||
if (it != hashes.end()) {
|
||||
hashes.erase(it);
|
||||
hashCacheDirty = true;
|
||||
}
|
||||
|
||||
if (newHash.empty()) {
|
||||
return; // we're done
|
||||
}
|
||||
|
||||
// use a cloned SGPath and reset its caching to force one stat() call
|
||||
SGPath p2(p);
|
||||
p2.set_cached(false);
|
||||
p2.set_cached(true);
|
||||
|
||||
HashCacheEntry entry;
|
||||
entry.filePath = ps;
|
||||
entry.hashHex = newHash;
|
||||
entry.modTime = p2.modTime();
|
||||
entry.lengthBytes = p2.sizeInBytes();
|
||||
hashes.insert(std::make_pair(ps, entry));
|
||||
|
||||
hashCacheDirty = true;
|
||||
}
|
||||
|
||||
|
||||
HTTPRepoPrivate* _repository;
|
||||
std::string _relativePath; // in URL and file-system space
|
||||
};
|
||||
@ -607,7 +766,6 @@ HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
|
||||
_d->http = cl;
|
||||
_d->basePath = base;
|
||||
_d->rootDir.reset(new HTTPDirectory(_d.get(), ""));
|
||||
_d->parseHashCache();
|
||||
}
|
||||
|
||||
HTTPRepository::~HTTPRepository()
|
||||
@ -678,7 +836,6 @@ void HTTPRepoPrivate::checkForComplete()
|
||||
{
|
||||
if (pendingUpdateOfChildren.empty() && activeRequests.empty() && queuedRequests.empty()) {
|
||||
isUpdating = false;
|
||||
writeHashCache();
|
||||
}
|
||||
}
|
||||
|
||||
@ -896,7 +1053,7 @@ HTTPRepository::failure() const
|
||||
return;
|
||||
}
|
||||
|
||||
std::string curHash = _directory->repository()->hashForPath(path());
|
||||
std::string curHash = _directory->hashForPath(path());
|
||||
if (hash != curHash) {
|
||||
simgear::Dir d(_directory->absolutePath());
|
||||
if (!d.exists()) {
|
||||
@ -995,6 +1152,7 @@ HTTPRepository::failure() const
|
||||
http->cancelRequest(*rq, "Repository object deleted");
|
||||
}
|
||||
|
||||
flushHashCaches();
|
||||
directories.clear(); // wil delete them all
|
||||
}
|
||||
|
||||
@ -1014,147 +1172,6 @@ HTTPRepository::failure() const
|
||||
return r;
|
||||
}
|
||||
|
||||
std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
|
||||
{
|
||||
const auto ps = p.utf8Str();
|
||||
auto it = hashes.find(ps);
|
||||
if (it != hashes.end()) {
|
||||
const auto& entry = it->second;
|
||||
// ensure data on disk hasn't changed.
|
||||
// we could also use the file type here if we were paranoid
|
||||
if ((p.sizeInBytes() == entry.lengthBytes) && (p.modTime() == entry.modTime)) {
|
||||
return entry.hashHex;
|
||||
}
|
||||
|
||||
// entry in the cache, but it's stale so remove and fall through
|
||||
hashes.erase(it);
|
||||
}
|
||||
|
||||
std::string hash = computeHashForPath(p);
|
||||
updatedFileContents(p, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
|
||||
{
|
||||
if (!p.exists())
|
||||
return {};
|
||||
|
||||
sha1nfo info;
|
||||
sha1_init(&info);
|
||||
|
||||
const int bufSize = 1024 * 1024;
|
||||
char* buf = static_cast<char*>(malloc(bufSize));
|
||||
if (!buf) {
|
||||
sg_io_exception("Couldn't allocate SHA1 computation buffer");
|
||||
}
|
||||
|
||||
size_t readLen;
|
||||
SGBinaryFile f(p);
|
||||
if (!f.open(SG_IO_IN)) {
|
||||
free(buf);
|
||||
throw sg_io_exception("Couldn't open file for compute hash", p);
|
||||
}
|
||||
while ((readLen = f.read(buf, bufSize)) > 0) {
|
||||
sha1_write(&info, buf, readLen);
|
||||
}
|
||||
|
||||
f.close();
|
||||
free(buf);
|
||||
std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
|
||||
return strutils::encodeHex(hashBytes);
|
||||
}
|
||||
|
||||
void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
|
||||
{
|
||||
// remove the existing entry
|
||||
const auto ps = p.utf8Str();
|
||||
auto it = hashes.find(ps);
|
||||
if (it != hashes.end()) {
|
||||
hashes.erase(it);
|
||||
++hashCacheDirty;
|
||||
}
|
||||
|
||||
if (newHash.empty()) {
|
||||
return; // we're done
|
||||
}
|
||||
|
||||
// use a cloned SGPath and reset its caching to force one stat() call
|
||||
SGPath p2(p);
|
||||
p2.set_cached(false);
|
||||
p2.set_cached(true);
|
||||
|
||||
HashCacheEntry entry;
|
||||
entry.filePath = ps;
|
||||
entry.hashHex = newHash;
|
||||
entry.modTime = p2.modTime();
|
||||
entry.lengthBytes = p2.sizeInBytes();
|
||||
hashes.insert(std::make_pair(ps, entry));
|
||||
|
||||
++hashCacheDirty ;
|
||||
}
|
||||
|
||||
void HTTPRepoPrivate::writeHashCache()
|
||||
{
|
||||
if (hashCacheDirty == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
SGPath cachePath = basePath;
|
||||
cachePath.append(".hashes");
|
||||
sg_ofstream stream(cachePath, std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
for (const auto& e : hashes) {
|
||||
const auto& entry = e.second;
|
||||
|
||||
stream << entry.filePath << "*" << entry.modTime << "*"
|
||||
<< entry.lengthBytes << "*" << entry.hashHex << "\n";
|
||||
}
|
||||
stream.close();
|
||||
hashCacheDirty = 0;
|
||||
}
|
||||
|
||||
void HTTPRepoPrivate::parseHashCache()
|
||||
{
|
||||
hashes.clear();
|
||||
SGPath cachePath = basePath;
|
||||
cachePath.append(".hashes");
|
||||
if (!cachePath.exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
sg_ifstream stream(cachePath, std::ios::in);
|
||||
|
||||
while (!stream.eof()) {
|
||||
std::string line;
|
||||
std::getline(stream,line);
|
||||
line = simgear::strutils::strip(line);
|
||||
if( line.empty() || line[0] == '#' )
|
||||
continue;
|
||||
|
||||
string_list tokens = simgear::strutils::split(line, "*");
|
||||
if( tokens.size() < 4 ) {
|
||||
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
|
||||
continue;
|
||||
}
|
||||
const std::string nameData = simgear::strutils::strip(tokens[0]);
|
||||
const std::string timeData = simgear::strutils::strip(tokens[1]);
|
||||
const std::string sizeData = simgear::strutils::strip(tokens[2]);
|
||||
const std::string hashData = simgear::strutils::strip(tokens[3]);
|
||||
|
||||
if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
|
||||
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
|
||||
continue;
|
||||
}
|
||||
|
||||
HashCacheEntry entry;
|
||||
entry.filePath = nameData;
|
||||
entry.hashHex = hashData;
|
||||
entry.modTime = strtol(timeData.c_str(), NULL, 10);
|
||||
entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
|
||||
hashes.insert(std::make_pair(entry.filePath, entry));
|
||||
}
|
||||
}
|
||||
|
||||
class DirectoryWithPath
|
||||
{
|
||||
public:
|
||||
@ -1184,16 +1201,12 @@ HTTPRepository::failure() const
|
||||
if (it != directories.end()) {
|
||||
assert((*it)->absolutePath() == absPath);
|
||||
directories.erase(it);
|
||||
} else {
|
||||
// we encounter this code path when deleting an orphaned directory
|
||||
}
|
||||
} else {
|
||||
// we encounter this code path when deleting an orphaned directory
|
||||
}
|
||||
|
||||
Dir dir(absPath);
|
||||
Dir dir(absPath);
|
||||
bool result = dir.remove(true);
|
||||
|
||||
// update the hash cache too
|
||||
updatedFileContents(absPath, std::string());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1229,13 +1242,9 @@ HTTPRepository::failure() const
|
||||
http->makeRequest(rr);
|
||||
}
|
||||
|
||||
// rate limit how often we write this, since otherwise
|
||||
// it dominates the time on Windows. 256 seems about right,
|
||||
// causes a write a few times a minute.
|
||||
if (hashCacheDirty > 256) {
|
||||
writeHashCache();
|
||||
if (countDirtyHashCaches() > 32) {
|
||||
flushHashCaches();
|
||||
}
|
||||
|
||||
checkForComplete();
|
||||
}
|
||||
|
||||
@ -1307,4 +1316,24 @@ HTTPRepository::failure() const
|
||||
pendingUpdateOfChildren.push_back(dir);
|
||||
}
|
||||
|
||||
int HTTPRepoPrivate::countDirtyHashCaches() const
|
||||
{
|
||||
int result = rootDir->isHashCacheDirty() ? 1 : 0;
|
||||
for (const auto& dir : directories) {
|
||||
if (dir->isHashCacheDirty()) {
|
||||
++result;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void HTTPRepoPrivate::flushHashCaches()
|
||||
{
|
||||
rootDir->writeHashCache();
|
||||
for (const auto& dir : directories) {
|
||||
dir->writeHashCache();
|
||||
}
|
||||
}
|
||||
|
||||
} // of namespace simgear
|
||||
|
@ -54,16 +54,7 @@ typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
|
||||
|
||||
class HTTPRepoPrivate {
|
||||
public:
|
||||
struct HashCacheEntry {
|
||||
std::string filePath;
|
||||
time_t modTime;
|
||||
size_t lengthBytes;
|
||||
std::string hashHex;
|
||||
};
|
||||
|
||||
using HashCache = std::unordered_map<std::string, HashCacheEntry>;
|
||||
HashCache hashes;
|
||||
int hashCacheDirty = 0;
|
||||
|
||||
HTTPRepository::FailureVec failures;
|
||||
int maxPermittedFailures = 16;
|
||||
@ -91,12 +82,6 @@ public:
|
||||
HTTP::Request_ptr updateDir(HTTPDirectory *dir, const std::string &hash,
|
||||
size_t sz);
|
||||
|
||||
std::string hashForPath(const SGPath &p);
|
||||
void updatedFileContents(const SGPath &p, const std::string &newHash);
|
||||
void parseHashCache();
|
||||
std::string computeHashForPath(const SGPath &p);
|
||||
void writeHashCache();
|
||||
|
||||
void failedToGetRootIndex(HTTPRepository::ResultCode st);
|
||||
void failedToUpdateChild(const SGPath &relativePath,
|
||||
HTTPRepository::ResultCode fileStatus);
|
||||
@ -128,6 +113,9 @@ public:
|
||||
std::deque<HTTPDirectory*> pendingUpdateOfChildren;
|
||||
|
||||
SGPath installedCopyPath;
|
||||
|
||||
int countDirtyHashCaches() const;
|
||||
void flushHashCaches();
|
||||
};
|
||||
|
||||
} // namespace simgear
|
||||
|
Loading…
Reference in New Issue
Block a user