TerraSync: fast start Airports/ sync

If TerraSync/Airports is missing, download Airport_archive.tgz instead.
In all other cases update as normal. This has the effect, that fresh
installs make many fewer requests to TerraSync servers.
This commit is contained in:
Automatic Release Builder 2020-10-05 11:58:03 +01:00 committed by James Turner
parent ed357c5c8f
commit a5dd5cdc80
3 changed files with 178 additions and 87 deletions

View File

@ -140,6 +140,7 @@ public:
HTTPRepository::ResultCode status; HTTPRepository::ResultCode status;
HTTPDirectory_ptr rootDir; HTTPDirectory_ptr rootDir;
size_t totalDownloaded; size_t totalDownloaded;
HTTPRepository::SyncPredicate syncPredicate;
HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name, HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
size_t sz); size_t sz);
@ -176,25 +177,14 @@ class HTTPDirectory
{ {
struct ChildInfo struct ChildInfo
{ {
enum Type ChildInfo(HTTPRepository::EntryType ty, const std::string &nameData,
{ const std::string &hashData)
FileType, : type(ty), name(nameData), hash(hashData) {}
DirectoryType,
TarballType
};
ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
type(ty),
name(nameData),
hash(hashData)
{
}
ChildInfo(const ChildInfo& other) = default; ChildInfo(const ChildInfo& other) = default;
ChildInfo& operator=(const ChildInfo& other) = default; ChildInfo& operator=(const ChildInfo& other) = default;
void setSize(const std::string & sizeData) void setSize(const std::string &sizeData) {
{
sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10); sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
} }
@ -203,7 +193,7 @@ class HTTPDirectory
return name < other.name; return name < other.name;
} }
Type type; HTTPRepository::EntryType type;
std::string name, hash; std::string name, hash;
size_t sizeInBytes = 0; size_t sizeInBytes = 0;
SGPath path; // absolute path on disk SGPath path; // absolute path on disk
@ -239,7 +229,7 @@ public:
std::string url() const std::string url() const
{ {
if (_relativePath.empty()) { if (_relativePath.empty()) { // root directory of the repo
return _repository->baseUrl; return _repository->baseUrl;
} }
@ -277,7 +267,7 @@ public:
size_t bufSize = 0; size_t bufSize = 0;
for (const auto& child : children) { for (const auto& child : children) {
if (child.type != ChildInfo::FileType) if (child.type != HTTPRepository::FileType)
continue; continue;
if (child.path.exists()) if (child.path.exists())
@ -316,6 +306,8 @@ public:
void updateChildrenBasedOnHash() void updateChildrenBasedOnHash()
{ {
using SAct = HTTPRepository::SyncAction;
copyInstalledChildren(); copyInstalledChildren();
ChildInfoList toBeUpdated; ChildInfoList toBeUpdated;
@ -324,27 +316,77 @@ public:
PathList fsChildren = d.children(0); PathList fsChildren = d.children(0);
PathList orphans = d.children(0); PathList orphans = d.children(0);
ChildInfoList::const_iterator it; for (const auto &c : children) {
for (it=children.begin(); it != children.end(); ++it) {
// Check if the file exists // Check if the file exists
PathList::const_iterator p = std::find_if(fsChildren.begin(), fsChildren.end(), LocalFileMatcher(*it)); auto p = std::find_if(fsChildren.begin(), fsChildren.end(),
if (p == fsChildren.end()) { LocalFileMatcher(c));
const bool isNew = (p == fsChildren.end());
const bool upToDate = hashForChild(c) == c.hash;
if (!isNew) {
orphans.erase(std::remove(orphans.begin(), orphans.end(), *p),
orphans.end());
}
if (_repository->syncPredicate) {
const auto pathOnDisk = isNew ? absolutePath() / c.name : *p;
// never handle deletes here, do them at the end
const auto action =
isNew ? SAct::Add : (upToDate ? SAct::UpToDate : SAct::Update);
const HTTPRepository::SyncItem item = {relativePath(), c.type, c.name,
action, pathOnDisk};
const bool doSync = _repository->syncPredicate(item);
if (!doSync) {
continue; // skip it, predicate filtered it out
}
}
if (isNew) {
// File or directory does not exist on local disk, so needs to be updated. // File or directory does not exist on local disk, so needs to be updated.
toBeUpdated.push_back(ChildInfo(*it)); toBeUpdated.push_back(c);
} else if (hashForChild(*it) != it->hash) { } else if (!upToDate) {
// File/directory exists, but hash doesn't match. // File/directory exists, but hash doesn't match.
toBeUpdated.push_back(ChildInfo(*it)); toBeUpdated.push_back(c);
orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end());
} else { } else {
// File/Directory exists and hash is valid. // File/Directory exists and hash is valid.
orphans.erase(std::remove(orphans.begin(), orphans.end(), *p), orphans.end());
if (it->type == ChildInfo::DirectoryType) { if (c.type == HTTPRepository::DirectoryType) {
// If it's a directory,perform a recursive check. // If it's a directory,perform a recursive check.
HTTPDirectory* childDir = childDirectory(it->name); HTTPDirectory *childDir = childDirectory(c.name);
childDir->updateChildrenBasedOnHash(); childDir->updateChildrenBasedOnHash();
} }
} }
} // of repository-defined (well, .dirIndex) children iteration
// allow the filtering of orphans; this is important so that a filter
// can be used to preserve non-repo files in a directory,
// i.e somewhat like a .gitignore
if (!orphans.empty() && _repository->syncPredicate) {
const auto ourPath = relativePath();
const auto pred = _repository->syncPredicate;
auto l = [ourPath, pred](const SGPath &o) {
// this doesn't special-case for tarballs (they will be reported as a
// file) I think that's okay, since a filter can see the full path
const auto type = o.isDir() ? HTTPRepository::DirectoryType
: HTTPRepository::FileType;
const HTTPRepository::SyncItem item = {ourPath, type, o.file(),
SAct::Delete, o};
const bool r = pred(item);
// clarification: the predicate returns true if the file should be
// handled as normal, false if it should be skipped. But since we're
// inside a remove_if, we want to remove *skipped* files from orphans,
// so they don't get deleted. So we want to return true here, if the
// file should be skipped.
return (r == false);
};
auto it = std::remove_if(orphans.begin(), orphans.end(), l);
orphans.erase(it, orphans.end());
} }
// We now have a list of entries that need to be updated, and a list // We now have a list of entries that need to be updated, and a list
@ -384,16 +426,17 @@ public:
{ {
ChildInfoList::const_iterator it; ChildInfoList::const_iterator it;
for (it = names.begin(); it != names.end(); ++it) { for (it = names.begin(); it != names.end(); ++it) {
if (it->type == ChildInfo::FileType) { if (it->type == HTTPRepository::FileType) {
_repository->updateFile(this, it->name, it->sizeInBytes); _repository->updateFile(this, it->name, it->sizeInBytes);
} else if (it->type == ChildInfo::DirectoryType){ } else if (it->type == HTTPRepository::DirectoryType) {
HTTPDirectory *childDir = childDirectory(it->name); HTTPDirectory *childDir = childDirectory(it->name);
_repository->updateDir(childDir, it->hash, it->sizeInBytes); _repository->updateDir(childDir, it->hash, it->sizeInBytes);
} else if (it->type == ChildInfo::TarballType) { } else if (it->type == HTTPRepository::TarballType) {
// Download a tarball just as a file. // Download a tarball just as a file.
_repository->updateFile(this, it->name, it->sizeInBytes); _repository->updateFile(this, it->name, it->sizeInBytes);
} else { } else {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Coding error! Unknown Child type to schedule update"); SG_LOG(SG_TERRASYNC, SG_ALERT,
"Coding error! Unknown Child type to schedule update");
} }
} }
} }
@ -451,19 +494,23 @@ public:
SGPath extractDir = p.dir(); SGPath extractDir = p.dir();
ArchiveExtractor ex(extractDir); ArchiveExtractor ex(extractDir);
uint8_t* buf = (uint8_t*) alloca(128); uint8_t *buf = (uint8_t *)alloca(2048);
while (!f.eof()) { while (!f.eof()) {
size_t bufSize = f.read((char*) buf, 128); size_t bufSize = f.read((char *)buf, 2048);
ex.extractBytes(buf, bufSize); ex.extractBytes(buf, bufSize);
} }
ex.flush(); ex.flush();
if (! ex.isAtEndOfArchive()) { if (! ex.isAtEndOfArchive()) {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Corrupt tarball " << p); SG_LOG(SG_TERRASYNC, SG_ALERT, "Corrupt tarball " << p);
_repository->failedToUpdateChild(
_relativePath, HTTPRepository::REPO_ERROR_IO);
} }
if (ex.hasError()) { if (ex.hasError()) {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Error extracting " << p); SG_LOG(SG_TERRASYNC, SG_ALERT, "Error extracting " << p);
_repository->failedToUpdateChild(
_relativePath, HTTPRepository::REPO_ERROR_IO);
} }
} else { } else {
@ -577,9 +624,12 @@ private:
continue; continue;
} }
ChildInfo ci = ChildInfo(ChildInfo::FileType, tokens[1], tokens[2]); ChildInfo ci =
if (typeData == "d") ci.type = ChildInfo::DirectoryType; ChildInfo(HTTPRepository::FileType, tokens[1], tokens[2]);
if (typeData == "t") ci.type = ChildInfo::TarballType; if (typeData == "d")
ci.type = HTTPRepository::DirectoryType;
if (typeData == "t")
ci.type = HTTPRepository::TarballType;
children.emplace_back(ci); children.emplace_back(ci);
children.back().path = absolutePath() / tokens[1]; children.back().path = absolutePath() / tokens[1];
@ -614,8 +664,11 @@ private:
std::string hashForChild(const ChildInfo& child) const std::string hashForChild(const ChildInfo& child) const
{ {
SGPath p(child.path); SGPath p(child.path);
if (child.type == ChildInfo::DirectoryType) p.append(".dirindex"); if (child.type == HTTPRepository::DirectoryType)
if (child.type == ChildInfo::TarballType) p.concat(".tgz"); // For tarballs the hash is against the tarball file itself p.append(".dirindex");
if (child.type == HTTPRepository::TarballType)
p.concat(
".tgz"); // For tarballs the hash is against the tarball file itself
return _repository->hashForPath(p); return _repository->hashForPath(p);
} }
@ -720,6 +773,8 @@ std::string HTTPRepository::resultCodeAsString(ResultCode code)
return innerResultCodeAsString(code); return innerResultCodeAsString(code);
} }
void HTTPRepository::setFilter(SyncPredicate sp) { _d->syncPredicate = sp; }
HTTPRepository::ResultCode HTTPRepository::ResultCode
HTTPRepository::failure() const HTTPRepository::failure() const
{ {
@ -848,7 +903,13 @@ HTTPRepository::failure() const
if (responseCode() == 200) { if (responseCode() == 200) {
std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH); std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
if (!_targetHash.empty() && (hash != _targetHash)) { if (!_targetHash.empty() && (hash != _targetHash)) {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM); SG_LOG(SG_TERRASYNC, SG_WARN,
"Checksum error getting dirIndex for:"
<< _directory->relativePath() << "; expected "
<< _targetHash << " but received " << hash);
_directory->failedToUpdate(
HTTPRepository::REPO_ERROR_CHECKSUM);
_directory->repository()->finishedRequest(this); _directory->repository()->finishedRequest(this);
return; return;
} }

View File

@ -20,6 +20,7 @@
#ifndef SG_IO_HTTP_REPOSITORY_HXX #ifndef SG_IO_HTTP_REPOSITORY_HXX
#define SG_IO_HTTP_REPOSITORY_HXX #define SG_IO_HTTP_REPOSITORY_HXX
#include <functional>
#include <memory> #include <memory>
#include <simgear/misc/sg_path.hxx> #include <simgear/misc/sg_path.hxx>
@ -74,6 +75,22 @@ public:
static std::string resultCodeAsString(ResultCode code); static std::string resultCodeAsString(ResultCode code);
enum class SyncAction { Add, Update, Delete, UpToDate };
enum EntryType { FileType, DirectoryType, TarballType };
struct SyncItem {
const std::string directory; // relative path in the repository
const EntryType type;
const std::string filename;
const SyncAction action;
const SGPath pathOnDisk; // path the entry does / will have
};
using SyncPredicate = std::function<bool(const SyncItem &item)>;
void setFilter(SyncPredicate sp);
private: private:
bool isBare() const; bool isBare() const;

View File

@ -597,8 +597,32 @@ void SGTerraSync::WorkerThread::updateSyncSlot(SyncSlot &slot)
} }
} // of creating directory step } // of creating directory step
// optimise initial Airport download
if (slot.isNewDirectory &&
(slot.currentItem._type == SyncItem::AirportData)) {
SG_LOG(SG_TERRASYNC, SG_INFO, "doing Airports download via tarball");
// we want to sync the 'root' TerraSync dir, but not all of it, just
// the Airports_archive.tar.gz file so we use our TerraSync local root
// as the path (since the archive will add Airports/)
slot.repository.reset(new HTTPRepository(_local_dir, &_http));
slot.repository->setBaseUrl(_httpServer + "/");
// filter callback to *only* sync the Airport_archive tarball,
// and ensure no other contents are touched
auto f = [](const HTTPRepository::SyncItem &item) {
if (!item.directory.empty())
return false;
return (item.filename.find("Airports_archive.") == 0);
};
slot.repository->setFilter(f);
} else {
slot.repository.reset(new HTTPRepository(path, &_http)); slot.repository.reset(new HTTPRepository(path, &_http));
slot.repository->setBaseUrl(_httpServer + "/" + slot.currentItem._dir); slot.repository->setBaseUrl(_httpServer + "/" +
slot.currentItem._dir);
}
if (_installRoot.exists()) { if (_installRoot.exists()) {
SGPath p = _installRoot; SGPath p = _installRoot;
@ -1089,21 +1113,10 @@ bool SGTerraSync::isIdle() {return _workerThread->isIdle();}
void SGTerraSync::syncAirportsModels() void SGTerraSync::syncAirportsModels()
{ {
static const char* bounds = "MZAJKL"; // airport sync order: K-L, A-J, M-Z SyncItem w("Airports", SyncItem::AirportData);
// note "request" method uses LIFO order, i.e. processes most recent request first SyncItem a("Models", SyncItem::SharedModels);
for( unsigned i = 0; i < strlen(bounds)/2; i++ )
{
for ( char synced_other = bounds[2*i]; synced_other <= bounds[2*i+1]; synced_other++ )
{
ostringstream dir;
dir << "Airports/" << synced_other;
SyncItem w(dir.str(), SyncItem::AirportData);
_workerThread->request( w );
}
}
SyncItem w("Models", SyncItem::SharedModels);
_workerThread->request(w); _workerThread->request(w);
_workerThread->request(a);
} }
string_list SGTerraSync::getSceneryPathSuffixes() const string_list SGTerraSync::getSceneryPathSuffixes() const