HTTPRepository: improving handling of archives

Avoid hard-coding the archive extension, and ensure the extracted
archive directory is not orphaned on update. Finally, use the
literal filename in the .dirindex when computing the hash, rather
than adding a .tgz extension.
This commit is contained in:
James Turner 2021-01-08 19:39:14 +00:00
parent 161f76508d
commit 9fbf56004b

View File

@ -266,6 +266,19 @@ public:
free(buf); free(buf);
} }
/// helper to check and erase 'fooBar' from paths, if passed fooBar.zip, fooBar.tgz, etc.
void removeExtractedDirectoryFromList(PathList& paths, const std::string& tarballName)
{
const auto directoryName = SGPath::fromUtf8(tarballName).file_base();
auto it = std::find_if(paths.begin(), paths.end(), [directoryName](const SGPath& p) {
return p.isDir() && (p.file() == directoryName);
});
if (it != paths.end()) {
paths.erase(it);
}
}
void updateChildrenBasedOnHash() void updateChildrenBasedOnHash()
{ {
using SAct = HTTPRepository::SyncAction; using SAct = HTTPRepository::SyncAction;
@ -299,6 +312,11 @@ public:
orphans.end()); orphans.end());
} }
// ensure the extracted directory corresponding to a tarball, is *not* considered an orphan
if (c.type == HTTPRepository::TarballType) {
removeExtractedDirectoryFromList(orphans, c.name);
}
if (_repository->syncPredicate) { if (_repository->syncPredicate) {
const auto pathOnDisk = isNew ? absolutePath() / c.name : *p; const auto pathOnDisk = isNew ? absolutePath() / c.name : *p;
// never handle deletes here, do them at the end // never handle deletes here, do them at the end
@ -321,7 +339,6 @@ public:
toBeUpdated.push_back(c); toBeUpdated.push_back(c);
} else { } else {
// File/Directory exists and hash is valid. // File/Directory exists and hash is valid.
if (c.type == HTTPRepository::DirectoryType) { if (c.type == HTTPRepository::DirectoryType) {
// If it's a directory,perform a recursive check. // If it's a directory,perform a recursive check.
HTTPDirectory *childDir = childDirectory(c.name); HTTPDirectory *childDir = childDirectory(c.name);
@ -510,7 +527,7 @@ public:
_repository->totalDownloaded += sz; _repository->totalDownloaded += sz;
SGPath p = SGPath(absolutePath(), file); SGPath p = SGPath(absolutePath(), file);
if ((p.extension() == "tgz") || (p.extension() == "zip")) { if (it->type == HTTPRepository::TarballType) {
// We require that any compressed files have the same filename as the file or directory // We require that any compressed files have the same filename as the file or directory
// they expand to, so we can remove the old file/directory before extracting the new // they expand to, so we can remove the old file/directory before extracting the new
// data. // data.
@ -537,7 +554,7 @@ public:
// cleaned up. // cleaned up.
ArchiveExtractTaskPtr t = ArchiveExtractTaskPtr t =
std::make_shared<ArchiveExtractTask>(p, _relativePath); std::make_shared<ArchiveExtractTask>(p, _relativePath);
auto cb = [t](HTTPRepoPrivate *repo) { auto cb = [t](HTTPRepoPrivate* repo) {
return t->run(repo); return t->run(repo);
}; };
@ -545,7 +562,7 @@ public:
} else { } else {
SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath); SG_LOG(SG_TERRASYNC, SG_ALERT, "Unable to remove old file/directory " << removePath);
} // of pathAvailable } // of pathAvailable
} // of handling tgz files } // of handling archive files
} // of hash matches } // of hash matches
} // of found in child list } // of found in child list
} }
@ -737,11 +754,9 @@ private:
std::string hashForChild(const ChildInfo& child) const std::string hashForChild(const ChildInfo& child) const
{ {
SGPath p(child.path); SGPath p(child.path);
if (child.type == HTTPRepository::DirectoryType) if (child.type == HTTPRepository::DirectoryType) {
p.append(".dirindex"); p.append(".dirindex");
if (child.type == HTTPRepository::TarballType) }
p.concat(
".tgz"); // For tarballs the hash is against the tarball file itself
return hashForPath(p); return hashForPath(p);
} }