diff --git a/simgear/io/HTTPRepository.cxx b/simgear/io/HTTPRepository.cxx index 0a07db43..1c7409c3 100644 --- a/simgear/io/HTTPRepository.cxx +++ b/simgear/io/HTTPRepository.cxx @@ -81,6 +81,45 @@ std::string innerResultCodeAsString(HTTPRepository::ResultCode code) { return "Unknown response code"; } +struct HashCacheEntry { + std::string filePath; + time_t modTime; + size_t lengthBytes; + std::string hashHex; +}; + +using HashCache = std::unordered_map; + +std::string computeHashForPath(const SGPath& p) +{ + if (!p.exists()) + return {}; + + sha1nfo info; + sha1_init(&info); + + const int bufSize = 1024 * 1024; + char* buf = static_cast(malloc(bufSize)); + if (!buf) { + sg_io_exception("Couldn't allocate SHA1 computation buffer"); + } + + size_t readLen; + SGBinaryFile f(p); + if (!f.open(SG_IO_IN)) { + free(buf); + throw sg_io_exception("Couldn't open file for compute hash", p); + } + while ((readLen = f.read(buf, bufSize)) > 0) { + sha1_write(&info, buf, readLen); + } + + f.close(); + free(buf); + std::string hashBytes((char*)sha1_result(&info), HASH_LENGTH); + return strutils::encodeHex(hashBytes); +} + } // namespace class HTTPDirectory @@ -112,6 +151,9 @@ class HTTPDirectory typedef std::vector ChildInfoList; ChildInfoList children; + mutable HashCache hashes; + mutable bool hashCacheDirty = false; + public: HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) : _repository(repo), @@ -125,6 +167,8 @@ public: // already exists on disk parseDirIndex(children); std::sort(children.begin(), children.end()); + + parseHashCache(); } catch (sg_exception& ) { // parsing cache failed children.clear(); @@ -150,7 +194,7 @@ public: { SGPath fpath(absolutePath()); fpath.append(".dirindex"); - _repository->updatedFileContents(fpath, hash); + updatedFileContents(fpath, hash); children.clear(); parseDirIndex(children); @@ -179,20 +223,20 @@ public: size_t bufSize = 0; for (const auto& child : children) { - if (child.type != HTTPRepository::FileType) - continue; + if (child.type != HTTPRepository::FileType) + continue; - if (child.path.exists()) - continue; + if (child.path.exists()) + continue; - SGPath cp = _repository->installedCopyPath; - cp.append(relativePath()); - cp.append(child.name); - if (!cp.exists()) { - continue; - } + SGPath cp = _repository->installedCopyPath; + cp.append(relativePath()); + cp.append(child.name); + if (!cp.exists()) { + continue; + } - SGBinaryFile src(cp); + SGBinaryFile src(cp); SGBinaryFile dst(child.path); src.open(SG_IO_IN); dst.open(SG_IO_OUT); @@ -388,7 +432,7 @@ public: _relativePath + "/" + file, HTTPRepository::REPO_ERROR_CHECKSUM); } else { - _repository->updatedFileContents(it->path, hash); + updatedFileContents(it->path, hash); _repository->updatedChildSuccessfully(_relativePath + "/" + file); @@ -453,6 +497,50 @@ public: fpath.append(file); _repository->failedToUpdateChild(fpath, status); } + + std::string hashForPath(const SGPath& p) const + { + const auto ps = p.utf8Str(); + auto it = hashes.find(ps); + if (it != hashes.end()) { + const auto& entry = it->second; + // ensure data on disk hasn't changed. + // we could also use the file type here if we were paranoid + if ((p.sizeInBytes() == entry.lengthBytes) && (p.modTime() == entry.modTime)) { + return entry.hashHex; + } + + // entry in the cache, but it's stale so remove and fall through + hashes.erase(it); + } + + std::string hash = computeHashForPath(p); + updatedFileContents(p, hash); + return hash; + } + + bool isHashCacheDirty() const + { + return hashCacheDirty; + } + + void writeHashCache() const + { + if (!hashCacheDirty) + return; + + hashCacheDirty = false; + + SGPath cachePath = absolutePath() / ".dirhash"; + sg_ofstream stream(cachePath, std::ios::out | std::ios::trunc | std::ios::binary); + for (const auto& e : hashes) { + const auto& entry = e.second; + stream << entry.filePath << "*" << entry.modTime << "*" + << entry.lengthBytes << "*" << entry.hashHex << "\n"; + } + stream.close(); + } + private: struct ChildWithName @@ -576,7 +664,7 @@ private: ok = _repository->deleteDirectory(fpath, path); } else { // remove the hash cache entry - _repository->updatedFileContents(path, std::string()); + updatedFileContents(path, std::string()); ok = path.remove(); } @@ -594,9 +682,80 @@ private: if (child.type == HTTPRepository::TarballType) p.concat( ".tgz"); // For tarballs the hash is against the tarball file itself - return _repository->hashForPath(p); + return hashForPath(p); } + void parseHashCache() + { + hashes.clear(); + SGPath cachePath = absolutePath() / ".dirhash"; + if (!cachePath.exists()) { + return; + } + + sg_ifstream stream(cachePath, std::ios::in); + + while (!stream.eof()) { + std::string line; + std::getline(stream, line); + line = simgear::strutils::strip(line); + if (line.empty() || line[0] == '#') + continue; + + string_list tokens = simgear::strutils::split(line, "*"); + if (tokens.size() < 4) { + SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)"); + continue; + } + const std::string nameData = simgear::strutils::strip(tokens[0]); + const std::string timeData = simgear::strutils::strip(tokens[1]); + const std::string sizeData = simgear::strutils::strip(tokens[2]); + const std::string hashData = simgear::strutils::strip(tokens[3]); + + if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty()) { + SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)"); + continue; + } + + HashCacheEntry entry; + entry.filePath = nameData; + entry.hashHex = hashData; + entry.modTime = strtol(timeData.c_str(), NULL, 10); + entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10); + hashes.insert(std::make_pair(entry.filePath, entry)); + } + } + + void updatedFileContents(const SGPath& p, const std::string& newHash) const + { + // remove the existing entry + const auto ps = p.utf8Str(); + auto it = hashes.find(ps); + if (it != hashes.end()) { + hashes.erase(it); + hashCacheDirty = true; + } + + if (newHash.empty()) { + return; // we're done + } + + // use a cloned SGPath and reset its caching to force one stat() call + SGPath p2(p); + p2.set_cached(false); + p2.set_cached(true); + + HashCacheEntry entry; + entry.filePath = ps; + entry.hashHex = newHash; + entry.modTime = p2.modTime(); + entry.lengthBytes = p2.sizeInBytes(); + hashes.insert(std::make_pair(ps, entry)); + + hashCacheDirty = true; + } + + HTTPRepoPrivate* _repository; std::string _relativePath; // in URL and file-system space }; @@ -607,7 +766,6 @@ HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) : _d->http = cl; _d->basePath = base; _d->rootDir.reset(new HTTPDirectory(_d.get(), "")); - _d->parseHashCache(); } HTTPRepository::~HTTPRepository() @@ -678,7 +836,6 @@ void HTTPRepoPrivate::checkForComplete() { if (pendingUpdateOfChildren.empty() && activeRequests.empty() && queuedRequests.empty()) { isUpdating = false; - writeHashCache(); } } @@ -896,7 +1053,7 @@ HTTPRepository::failure() const return; } - std::string curHash = _directory->repository()->hashForPath(path()); + std::string curHash = _directory->hashForPath(path()); if (hash != curHash) { simgear::Dir d(_directory->absolutePath()); if (!d.exists()) { @@ -995,6 +1152,7 @@ HTTPRepository::failure() const http->cancelRequest(*rq, "Repository object deleted"); } + flushHashCaches(); directories.clear(); // wil delete them all } @@ -1014,147 +1172,6 @@ HTTPRepository::failure() const return r; } - std::string HTTPRepoPrivate::hashForPath(const SGPath& p) - { - const auto ps = p.utf8Str(); - auto it = hashes.find(ps); - if (it != hashes.end()) { - const auto& entry = it->second; - // ensure data on disk hasn't changed. - // we could also use the file type here if we were paranoid - if ((p.sizeInBytes() == entry.lengthBytes) && (p.modTime() == entry.modTime)) { - return entry.hashHex; - } - - // entry in the cache, but it's stale so remove and fall through - hashes.erase(it); - } - - std::string hash = computeHashForPath(p); - updatedFileContents(p, hash); - return hash; - } - - std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p) - { - if (!p.exists()) - return {}; - - sha1nfo info; - sha1_init(&info); - - const int bufSize = 1024 * 1024; - char* buf = static_cast(malloc(bufSize)); - if (!buf) { - sg_io_exception("Couldn't allocate SHA1 computation buffer"); - } - - size_t readLen; - SGBinaryFile f(p); - if (!f.open(SG_IO_IN)) { - free(buf); - throw sg_io_exception("Couldn't open file for compute hash", p); - } - while ((readLen = f.read(buf, bufSize)) > 0) { - sha1_write(&info, buf, readLen); - } - - f.close(); - free(buf); - std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH); - return strutils::encodeHex(hashBytes); - } - - void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash) - { - // remove the existing entry - const auto ps = p.utf8Str(); - auto it = hashes.find(ps); - if (it != hashes.end()) { - hashes.erase(it); - ++hashCacheDirty; - } - - if (newHash.empty()) { - return; // we're done - } - - // use a cloned SGPath and reset its caching to force one stat() call - SGPath p2(p); - p2.set_cached(false); - p2.set_cached(true); - - HashCacheEntry entry; - entry.filePath = ps; - entry.hashHex = newHash; - entry.modTime = p2.modTime(); - entry.lengthBytes = p2.sizeInBytes(); - hashes.insert(std::make_pair(ps, entry)); - - ++hashCacheDirty ; - } - - void HTTPRepoPrivate::writeHashCache() - { - if (hashCacheDirty == 0) { - return; - } - - SGPath cachePath = basePath; - cachePath.append(".hashes"); - sg_ofstream stream(cachePath, std::ios::out | std::ios::trunc | std::ios::binary); - for (const auto& e : hashes) { - const auto& entry = e.second; - - stream << entry.filePath << "*" << entry.modTime << "*" - << entry.lengthBytes << "*" << entry.hashHex << "\n"; - } - stream.close(); - hashCacheDirty = 0; - } - - void HTTPRepoPrivate::parseHashCache() - { - hashes.clear(); - SGPath cachePath = basePath; - cachePath.append(".hashes"); - if (!cachePath.exists()) { - return; - } - - sg_ifstream stream(cachePath, std::ios::in); - - while (!stream.eof()) { - std::string line; - std::getline(stream,line); - line = simgear::strutils::strip(line); - if( line.empty() || line[0] == '#' ) - continue; - - string_list tokens = simgear::strutils::split(line, "*"); - if( tokens.size() < 4 ) { - SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)"); - continue; - } - const std::string nameData = simgear::strutils::strip(tokens[0]); - const std::string timeData = simgear::strutils::strip(tokens[1]); - const std::string sizeData = simgear::strutils::strip(tokens[2]); - const std::string hashData = simgear::strutils::strip(tokens[3]); - - if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) { - SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)"); - continue; - } - - HashCacheEntry entry; - entry.filePath = nameData; - entry.hashHex = hashData; - entry.modTime = strtol(timeData.c_str(), NULL, 10); - entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10); - hashes.insert(std::make_pair(entry.filePath, entry)); - } - } - class DirectoryWithPath { public: @@ -1184,16 +1201,12 @@ HTTPRepository::failure() const if (it != directories.end()) { assert((*it)->absolutePath() == absPath); directories.erase(it); - } else { - // we encounter this code path when deleting an orphaned directory - } + } else { + // we encounter this code path when deleting an orphaned directory + } - Dir dir(absPath); + Dir dir(absPath); bool result = dir.remove(true); - - // update the hash cache too - updatedFileContents(absPath, std::string()); - return result; } @@ -1229,13 +1242,9 @@ HTTPRepository::failure() const http->makeRequest(rr); } - // rate limit how often we write this, since otherwise - // it dominates the time on Windows. 256 seems about right, - // causes a write a few times a minute. - if (hashCacheDirty > 256) { - writeHashCache(); + if (countDirtyHashCaches() > 32) { + flushHashCaches(); } - checkForComplete(); } @@ -1307,4 +1316,24 @@ HTTPRepository::failure() const pendingUpdateOfChildren.push_back(dir); } + int HTTPRepoPrivate::countDirtyHashCaches() const + { + int result = rootDir->isHashCacheDirty() ? 1 : 0; + for (const auto& dir : directories) { + if (dir->isHashCacheDirty()) { + ++result; + } + } + + return result; + } + + void HTTPRepoPrivate::flushHashCaches() + { + rootDir->writeHashCache(); + for (const auto& dir : directories) { + dir->writeHashCache(); + } + } + } // of namespace simgear diff --git a/simgear/io/HTTPRepository_private.hxx b/simgear/io/HTTPRepository_private.hxx index 70d2d97e..5c325831 100644 --- a/simgear/io/HTTPRepository_private.hxx +++ b/simgear/io/HTTPRepository_private.hxx @@ -54,16 +54,7 @@ typedef SGSharedPtr RepoRequestPtr; class HTTPRepoPrivate { public: - struct HashCacheEntry { - std::string filePath; - time_t modTime; - size_t lengthBytes; - std::string hashHex; - }; - using HashCache = std::unordered_map; - HashCache hashes; - int hashCacheDirty = 0; HTTPRepository::FailureVec failures; int maxPermittedFailures = 16; @@ -91,12 +82,6 @@ public: HTTP::Request_ptr updateDir(HTTPDirectory *dir, const std::string &hash, size_t sz); - std::string hashForPath(const SGPath &p); - void updatedFileContents(const SGPath &p, const std::string &newHash); - void parseHashCache(); - std::string computeHashForPath(const SGPath &p); - void writeHashCache(); - void failedToGetRootIndex(HTTPRepository::ResultCode st); void failedToUpdateChild(const SGPath &relativePath, HTTPRepository::ResultCode fileStatus); @@ -128,6 +113,9 @@ public: std::deque pendingUpdateOfChildren; SGPath installedCopyPath; + + int countDirtyHashCaches() const; + void flushHashCaches(); }; } // namespace simgear