HTTPRepository: improving handling of archives

Avoid hard-coding the archive extension, and ensure the extracted
archive directory is not orphaned on update. Finally, use the
literal filename in the .dirindex when computing the hash, rather
than adding a .tgz extension.
This commit is contained in:
James Turner 2021-01-08 19:39:14 +00:00
parent 161f76508d
commit 9fbf56004b

View File

@ -266,6 +266,19 @@ public:
free(buf); free(buf);
} }
/// helper to check and erase 'fooBar' from paths, if passed fooBar.zip, fooBar.tgz, etc.
void removeExtractedDirectoryFromList(PathList& paths, const std::string& tarballName)
{
const auto directoryName = SGPath::fromUtf8(tarballName).file_base();
auto it = std::find_if(paths.begin(), paths.end(), [directoryName](const SGPath& p) {
return p.isDir() && (p.file() == directoryName);
});
if (it != paths.end()) {
paths.erase(it);
}
}
void updateChildrenBasedOnHash() void updateChildrenBasedOnHash()
{ {
using SAct = HTTPRepository::SyncAction; using SAct = HTTPRepository::SyncAction;
@ -299,6 +312,11 @@ public:
orphans.end()); orphans.end());
} }
// ensure the extracted directory corresponding to a tarball, is *not* considered an orphan
if (c.type == HTTPRepository::TarballType) {
removeExtractedDirectoryFromList(orphans, c.name);
}
if (_repository->syncPredicate) { if (_repository->syncPredicate) {
const auto pathOnDisk = isNew ? absolutePath() / c.name : *p; const auto pathOnDisk = isNew ? absolutePath() / c.name : *p;
// never handle deletes here, do them at the end // never handle deletes here, do them at the end
@ -321,7 +339,6 @@ public:
toBeUpdated.push_back(c); toBeUpdated.push_back(c);
} else { } else {
// File/Directory exists and hash is valid. // File/Directory exists and hash is valid.
if (c.type == HTTPRepository::DirectoryType) { if (c.type == HTTPRepository::DirectoryType) {
// If it's a directory,perform a recursive check. // If it's a directory,perform a recursive check.
HTTPDirectory *childDir = childDirectory(c.name); HTTPDirectory *childDir = childDirectory(c.name);
@ -510,42 +527,42 @@ public:
_repository->totalDownloaded += sz; _repository->totalDownloaded += sz;
SGPath p = SGPath(absolutePath(), file); SGPath p = SGPath(absolutePath(), file);
if ((p.extension() == "tgz") || (p.extension() == "zip")) { if (it->type == HTTPRepository::TarballType) {
// We require that any compressed files have the same filename as the file or directory // We require that any compressed files have the same filename as the file or directory
// they expand to, so we can remove the old file/directory before extracting the new // they expand to, so we can remove the old file/directory before extracting the new
// data. // data.
SGPath removePath = SGPath(p.base()); SGPath removePath = SGPath(p.base());
bool pathAvailable = true; bool pathAvailable = true;
if (removePath.exists()) { if (removePath.exists()) {
if (removePath.isDir()) { if (removePath.isDir()) {
simgear::Dir pd(removePath); simgear::Dir pd(removePath);
pathAvailable = pd.removeChildren(); pathAvailable = pd.removeChildren();
} else { } else {
pathAvailable = removePath.remove(); pathAvailable = removePath.remove();
}
} }
}
if (pathAvailable) { if (pathAvailable) {
// we use a Task helper to extract tarballs incrementally. // we use a Task helper to extract tarballs incrementally.
// without this, archive extraction blocks here, which // without this, archive extraction blocks here, which
// prevents other repositories downloading / updating. // prevents other repositories downloading / updating.
// Unfortunately due Windows AV (Defender, etc) we cna block // Unfortunately due Windows AV (Defender, etc) we cna block
// here for many minutes. // here for many minutes.
// use a lambda to own this shared_ptr; this means when the // use a lambda to own this shared_ptr; this means when the
// lambda is destroyed, the ArchiveExtraTask will get // lambda is destroyed, the ArchiveExtraTask will get
// cleaned up. // cleaned up.
ArchiveExtractTaskPtr t = ArchiveExtractTaskPtr t =
std::make_shared<ArchiveExtractTask>(p, _relativePath); std::make_shared<ArchiveExtractTask>(p, _relativePath);
auto cb = [t](HTTPRepoPrivate *repo) { auto cb = [t](HTTPRepoPrivate* repo) {
return t->run(repo); return t->run(repo);
}; };
_repository->addTask(cb); _repository->addTask(cb);
} else { } else {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath); SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath);
} // of pathAvailable } // of pathAvailable
} // of handling tgz files } // of handling archive files
} // of hash matches } // of hash matches
} // of found in child list } // of found in child list
} }
@ -737,11 +754,9 @@ private:
std::string hashForChild(const ChildInfo& child) const std::string hashForChild(const ChildInfo& child) const
{ {
SGPath p(child.path); SGPath p(child.path);
if (child.type == HTTPRepository::DirectoryType) if (child.type == HTTPRepository::DirectoryType) {
p.append(".dirindex"); p.append(".dirindex");
if (child.type == HTTPRepository::TarballType) }
p.concat(
".tgz"); // For tarballs the hash is against the tarball file itself
return hashForPath(p); return hashForPath(p);
} }