simgear/simgear/io/HTTPRepository.cxx
2018-09-24 14:54:20 +01:00

1174 lines
36 KiB
C++

// HTTPRepository.cxx -- plain HTTP TerraSync remote client
//
// Copyright (C) 20126 James Turner <zakalawe@mac.com>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include <simgear_config.h>
#include "HTTPRepository.hxx"
#include <iostream>
#include <cassert>
#include <algorithm>
#include <sstream>
#include <map>
#include <set>
#include <fstream>
#include <limits>
#include <cstdlib>
#include <fcntl.h>
#include "simgear/debug/logstream.hxx"
#include "simgear/misc/strutils.hxx"
#include <simgear/misc/sg_dir.hxx>
#include <simgear/io/HTTPClient.hxx>
#include <simgear/io/sg_file.hxx>
#include <simgear/io/iostreams/sgstream.hxx>
#include <simgear/structure/exception.hxx>
#include <simgear/timing/timestamp.hxx>
#include <simgear/misc/sg_hash.hxx>
namespace simgear
{
class HTTPDirectory;
class HTTPRepoGetRequest : public HTTP::Request
{
public:
HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
HTTP::Request(u),
_directory(d)
{
}
virtual void cancel();
size_t contentSize() const
{
return _contentSize;
}
void setContentSize(size_t sz)
{
_contentSize = sz;
}
protected:
HTTPDirectory* _directory;
size_t _contentSize = 0;
};
typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
std::string innerResultCodeAsString(HTTPRepository::ResultCode code)
{
switch (code) {
case HTTPRepository::REPO_NO_ERROR: return "no error";
case HTTPRepository::REPO_ERROR_NOT_FOUND: return "not found";
case HTTPRepository::REPO_ERROR_SOCKET: return "socket error";
case HTTPRepository::SVN_ERROR_XML: return "malformed XML";
case HTTPRepository::SVN_ERROR_TXDELTA: return "malformed XML";
case HTTPRepository::REPO_ERROR_IO: return "I/O error";
case HTTPRepository::REPO_ERROR_CHECKSUM: return "checksum verification error";
case HTTPRepository::REPO_ERROR_FILE_NOT_FOUND: return "file not found";
case HTTPRepository::REPO_ERROR_HTTP: return "HTTP-level error";
case HTTPRepository::REPO_ERROR_CANCELLED: return "cancelled";
case HTTPRepository::REPO_PARTIAL_UPDATE: return "partial update (incomplete)";
}
return "Unknown response code";
}
class HTTPRepoPrivate
{
public:
struct HashCacheEntry
{
std::string filePath;
time_t modTime;
size_t lengthBytes;
std::string hashHex;
};
typedef std::vector<HashCacheEntry> HashCache;
HashCache hashes;
bool hashCacheDirty;
struct Failure
{
SGPath path;
HTTPRepository::ResultCode error;
};
typedef std::vector<Failure> FailureList;
FailureList failures;
HTTPRepoPrivate(HTTPRepository* parent) :
hashCacheDirty(false),
p(parent),
isUpdating(false),
status(HTTPRepository::REPO_NO_ERROR),
totalDownloaded(0)
{ ; }
~HTTPRepoPrivate();
HTTPRepository* p; // link back to outer
HTTP::Client* http;
std::string baseUrl;
SGPath basePath;
bool isUpdating;
HTTPRepository::ResultCode status;
HTTPDirectory* rootDir;
size_t totalDownloaded;
HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
size_t sz);
HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
size_t sz);
std::string hashForPath(const SGPath& p);
void updatedFileContents(const SGPath& p, const std::string& newHash);
void parseHashCache();
std::string computeHashForPath(const SGPath& p);
void writeHashCache();
void failedToGetRootIndex(HTTPRepository::ResultCode st);
void failedToUpdateChild(const SGPath& relativePath,
HTTPRepository::ResultCode fileStatus);
typedef std::vector<RepoRequestPtr> RequestVector;
RequestVector queuedRequests,
activeRequests;
void makeRequest(RepoRequestPtr req);
void finishedRequest(const RepoRequestPtr& req);
HTTPDirectory* getOrCreateDirectory(const std::string& path);
bool deleteDirectory(const std::string& relPath, const SGPath& absPath);
typedef std::vector<HTTPDirectory*> DirectoryVector;
DirectoryVector directories;
SGPath installedCopyPath;
};
class HTTPDirectory
{
struct ChildInfo
{
enum Type
{
FileType,
DirectoryType
};
ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
type(ty),
name(nameData),
hash(hashData)
{
}
ChildInfo(const ChildInfo& other) = default;
void setSize(const std::string & sizeData)
{
sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
}
bool operator<(const ChildInfo& other) const
{
return name < other.name;
}
Type type;
std::string name, hash;
size_t sizeInBytes = 0;
SGPath path; // absolute path on disk
};
typedef std::vector<ChildInfo> ChildInfoList;
ChildInfoList children;
public:
HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
_repository(repo),
_relativePath(path)
{
assert(repo);
SGPath p(absolutePath());
if (p.exists()) {
try {
// already exists on disk
parseDirIndex(children);
std::sort(children.begin(), children.end());
} catch (sg_exception& ) {
// parsing cache failed
children.clear();
}
}
}
HTTPRepoPrivate* repository() const
{
return _repository;
}
std::string url() const
{
if (_relativePath.empty()) {
return _repository->baseUrl;
}
return _repository->baseUrl + "/" + _relativePath;
}
void dirIndexUpdated(const std::string& hash)
{
SGPath fpath(absolutePath());
fpath.append(".dirindex");
_repository->updatedFileContents(fpath, hash);
children.clear();
parseDirIndex(children);
std::sort(children.begin(), children.end());
}
void failedToUpdate(HTTPRepository::ResultCode status)
{
if (_relativePath.empty()) {
// root dir failed
_repository->failedToGetRootIndex(status);
} else {
_repository->failedToUpdateChild(_relativePath, status);
}
}
void copyInstalledChildren()
{
if (_repository->installedCopyPath.isNull()) {
return;
}
char* buf = nullptr;
size_t bufSize = 0;
for (const auto& child : children) {
if (child.type != ChildInfo::FileType)
continue;
if (child.path.exists())
continue;
SGPath cp = _repository->installedCopyPath;
cp.append(relativePath());
cp.append(child.name);
if (!cp.exists()) {
continue;
}
SGBinaryFile src(cp);
SGBinaryFile dst(child.path);
src.open(SG_IO_IN);
dst.open(SG_IO_OUT);
if (bufSize < cp.sizeInBytes()) {
bufSize = cp.sizeInBytes();
free(buf);
buf = (char*) malloc(bufSize);
if (!buf) {
continue;
}
}
src.read(buf, cp.sizeInBytes());
dst.write(buf, cp.sizeInBytes());
src.close();
dst.close();
}
free(buf);
}
void updateChildrenBasedOnHash()
{
//SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
copyInstalledChildren();
string_list toBeUpdated, orphans,
indexNames = indexChildren();
simgear::Dir d(absolutePath());
PathList fsChildren = d.children(0);
for (const auto& child : fsChildren) {
const auto& fileName = child.file();
if ((fileName == ".dirindex") || (fileName == ".hashes")) {
continue;
}
ChildInfo info(child.isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
fileName, "");
info.path = child;
std::string hash = hashForChild(info);
ChildInfoList::iterator c = findIndexChild(fileName);
if (c == children.end()) {
orphans.push_back(fileName);
} else if (c->hash != hash) {
#if 0
SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << fileName);
// file exists, but hash mismatch, schedule update
if (!hash.empty()) {
SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << fileName);
SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
}
#endif
toBeUpdated.push_back(fileName);
} else {
// file exists and hash is valid. If it's a directory,
// perform a recursive check.
if (c->type == ChildInfo::DirectoryType) {
HTTPDirectory* childDir = childDirectory(fileName);
childDir->updateChildrenBasedOnHash();
}
}
// remove existing file system children from the index list,
// so we can detect new children
// https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), fileName), indexNames.end());
} // of real children iteration
// all remaining names in indexChilden are new children
toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
removeOrphans(orphans);
scheduleUpdates(toBeUpdated);
}
HTTPDirectory* childDirectory(const std::string& name)
{
std::string childPath = relativePath().empty() ? name : relativePath() + "/" + name;
return _repository->getOrCreateDirectory(childPath);
}
void removeOrphans(const string_list& orphans)
{
string_list::const_iterator it;
for (it = orphans.begin(); it != orphans.end(); ++it) {
removeChild(*it);
}
}
string_list indexChildren() const
{
string_list r;
r.reserve(children.size());
ChildInfoList::const_iterator it;
for (it=children.begin(); it != children.end(); ++it) {
r.push_back(it->name);
}
return r;
}
void scheduleUpdates(const string_list& names)
{
string_list::const_iterator it;
for (it = names.begin(); it != names.end(); ++it) {
ChildInfoList::iterator cit = findIndexChild(*it);
if (cit == children.end()) {
SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
continue;
}
if (cit->type == ChildInfo::FileType) {
_repository->updateFile(this, *it, cit->sizeInBytes);
} else {
HTTPDirectory* childDir = childDirectory(*it);
_repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
}
}
}
SGPath absolutePath() const
{
SGPath r(_repository->basePath);
r.append(_relativePath);
return r;
}
std::string relativePath() const
{
return _relativePath;
}
void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
{
// check hash matches what we expected
auto it = findIndexChild(file);
if (it == children.end()) {
SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
} else {
if (it->hash != hash) {
// we don't erase the file on a hash mismatch, becuase if we're syncing during the
// middle of a server-side update, the downloaded file may actually become valid.
_repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
} else {
_repository->updatedFileContents(it->path, hash);
_repository->totalDownloaded += sz;
} // of hash matches
} // of found in child list
}
void didFailToUpdateFile(const std::string& file,
HTTPRepository::ResultCode status)
{
SGPath fpath(_relativePath);
fpath.append(file);
_repository->failedToUpdateChild(fpath, status);
}
private:
struct ChildWithName
{
ChildWithName(const std::string& n) : name(n) {}
std::string name;
bool operator()(const ChildInfo& info) const
{ return info.name == name; }
};
ChildInfoList::iterator findIndexChild(const std::string& name)
{
return std::find_if(children.begin(), children.end(), ChildWithName(name));
}
bool parseDirIndex(ChildInfoList& children)
{
SGPath p(absolutePath());
p.append(".dirindex");
if (!p.exists()) {
return false;
}
sg_ifstream indexStream(p, std::ios::in );
if ( !indexStream.is_open() ) {
throw sg_io_exception("cannot open dirIndex file", p);
}
while (!indexStream.eof() ) {
std::string line;
std::getline( indexStream, line );
line = simgear::strutils::strip(line);
// skip blank line or comment beginning with '#'
if( line.empty() || line[0] == '#' )
continue;
string_list tokens = simgear::strutils::split( line, ":" );
std::string typeData = tokens[0];
if( typeData == "version" ) {
if( tokens.size() < 2 ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'"
<< "\n\tparsing:" << p.utf8Str());
break;
}
if( tokens[1] != "1" ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)"
<< "\n\tparsing:" << p.utf8Str());
break;
}
continue; // version is good, continue
}
if( typeData == "path" ) {
continue; // ignore path, next line
}
if( typeData == "time" && tokens.size() > 1 ) {
// SG_LOG(SG_TERRASYNC, SG_INFO, ".dirindex at '" << p.str() << "' timestamp: " << tokens[1] );
continue;
}
if( tokens.size() < 3 ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)"
<< "\n\tparsing:" << p.utf8Str());
continue;
}
if (typeData != "f" && typeData != "d" ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)"
<< "\n\tparsing:" << p.utf8Str());
continue;
}
// security: prevent writing outside the repository via ../../.. filenames
// (valid filenames never contain / - subdirectories have their own .dirindex)
if ((tokens[1] == "..") || (tokens[1].find_first_of("/\\") != std::string::npos)) {
SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid filename in line '" << line << "', (ignoring line)"
<< "\n\tparsing:" << p.utf8Str());
continue;
}
children.emplace_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
children.back().path = absolutePath() / tokens[1];
if (tokens.size() > 3) {
children.back().setSize(tokens[3]);
}
}
return true;
}
void removeChild(const std::string& name)
{
SGPath p(absolutePath());
p.append(name);
bool ok;
std::string fpath = _relativePath + "/" + name;
if (p.isDir()) {
ok = _repository->deleteDirectory(fpath, p);
} else {
// remove the hash cache entry
_repository->updatedFileContents(p, std::string());
ok = p.remove();
}
if (!ok) {
SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
throw sg_io_exception("Failed to remove existing file/dir:", p);
}
}
std::string hashForChild(const ChildInfo& child) const
{
SGPath p(child.path);
if (child.type == ChildInfo::DirectoryType) {
p.append(".dirindex");
}
return _repository->hashForPath(p);
}
HTTPRepoPrivate* _repository;
std::string _relativePath; // in URL and file-system space
};
HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
_d(new HTTPRepoPrivate(this))
{
_d->http = cl;
_d->basePath = base;
_d->rootDir = new HTTPDirectory(_d.get(), "");
_d->parseHashCache();
}
HTTPRepository::~HTTPRepository()
{
}
void HTTPRepository::setBaseUrl(const std::string &url)
{
_d->baseUrl = url;
}
std::string HTTPRepository::baseUrl() const
{
return _d->baseUrl;
}
HTTP::Client* HTTPRepository::http() const
{
return _d->http;
}
SGPath HTTPRepository::fsBase() const
{
return SGPath();
}
void HTTPRepository::update()
{
if (_d->isUpdating) {
return;
}
_d->status = REPO_NO_ERROR;
_d->isUpdating = true;
_d->failures.clear();
_d->updateDir(_d->rootDir, std::string(), 0);
}
bool HTTPRepository::isDoingSync() const
{
if (_d->status != REPO_NO_ERROR) {
return false;
}
return _d->isUpdating;
}
size_t HTTPRepository::bytesToDownload() const
{
size_t result = 0;
HTTPRepoPrivate::RequestVector::const_iterator r;
for (r = _d->queuedRequests.begin(); r != _d->queuedRequests.end(); ++r) {
result += (*r)->contentSize();
}
for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
if ((*r)->contentSize() > 0) {
// Content size for root dirindex of a repository is zero,
// and returing a negative value breaks everyting, so just ignore
// it
result += (*r)->contentSize() - (*r)->responseBytesReceived();
}
}
return result;
}
size_t HTTPRepository::bytesDownloaded() const
{
size_t result = _d->totalDownloaded;
HTTPRepoPrivate::RequestVector::const_iterator r;
for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
result += (*r)->responseBytesReceived();
}
return result;
}
void HTTPRepository::setInstalledCopyPath(const SGPath& copyPath)
{
_d->installedCopyPath = copyPath;
}
std::string HTTPRepository::resultCodeAsString(ResultCode code)
{
return innerResultCodeAsString(code);
}
HTTPRepository::ResultCode
HTTPRepository::failure() const
{
if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
return REPO_PARTIAL_UPDATE;
}
return _d->status;
}
void HTTPRepoGetRequest::cancel()
{
_directory->repository()->http->cancelRequest(this, "Repository cancelled");
_directory = 0;
}
class FileGetRequest : public HTTPRepoGetRequest
{
public:
FileGetRequest(HTTPDirectory* d, const std::string& file) :
HTTPRepoGetRequest(d, makeUrl(d, file)),
fileName(file)
{
pathInRepo = _directory->absolutePath();
pathInRepo.append(fileName);
}
protected:
virtual void gotBodyData(const char* s, int n)
{
if (!file.get()) {
file.reset(new SGBinaryFile(pathInRepo));
if (!file->open(SG_IO_OUT)) {
SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
_directory->repository()->http->cancelRequest(this, "Unable to create output file:" + pathInRepo.utf8Str());
}
sha1_init(&hashContext);
}
sha1_write(&hashContext, s, n);
file->write(s, n);
}
virtual void onDone()
{
file->close();
if (responseCode() == 200) {
std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
_directory->didUpdateFile(fileName, hash, contentSize());
} else if (responseCode() == 404) {
SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
_directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
} else {
SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() <<
"\n\tserver responded: " << responseCode() << "/" << responseReason());
_directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
}
_directory->repository()->finishedRequest(this);
}
virtual void onFail()
{
HTTPRepository::ResultCode code = HTTPRepository::REPO_ERROR_SOCKET;
if (responseCode() == -1) {
code = HTTPRepository::REPO_ERROR_CANCELLED;
}
if (file) {
file->close();
}
file.reset();
if (pathInRepo.exists()) {
pathInRepo.remove();
}
if (_directory) {
_directory->didFailToUpdateFile(fileName, code);
_directory->repository()->finishedRequest(this);
}
}
private:
static std::string makeUrl(HTTPDirectory* d, const std::string& file)
{
return d->url() + "/" + file;
}
std::string fileName; // if empty, we're getting the directory itself
SGPath pathInRepo;
simgear::sha1nfo hashContext;
std::unique_ptr<SGBinaryFile> file;
};
class DirGetRequest : public HTTPRepoGetRequest
{
public:
DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
HTTPRepoGetRequest(d, makeUrl(d)),
_isRootDir(false),
_targetHash(targetHash)
{
sha1_init(&hashContext);
}
void setIsRootDir()
{
_isRootDir = true;
}
bool isRootDir() const
{
return _isRootDir;
}
protected:
virtual void gotBodyData(const char* s, int n)
{
body += std::string(s, n);
sha1_write(&hashContext, s, n);
}
virtual void onDone()
{
if (responseCode() == 200) {
std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
if (!_targetHash.empty() && (hash != _targetHash)) {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
_directory->repository()->finishedRequest(this);
return;
}
std::string curHash = _directory->repository()->hashForPath(path());
if (hash != curHash) {
simgear::Dir d(_directory->absolutePath());
if (!d.exists()) {
if (!d.create(0700)) {
throw sg_io_exception("Unable to create directory", d.path());
}
}
// dir index data has changed, so write to disk and update
// the hash accordingly
sg_ofstream of(pathInRepo(), std::ios::trunc | std::ios::out | std::ios::binary);
if (!of.is_open()) {
throw sg_io_exception("Failed to open directory index file for writing", pathInRepo());
}
of.write(body.data(), body.size());
of.close();
_directory->dirIndexUpdated(hash);
//SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
}
_directory->repository()->totalDownloaded += contentSize();
try {
// either way we've confirmed the index is valid so update
// children now
SGTimeStamp st;
st.stamp();
_directory->updateChildrenBasedOnHash();
SG_LOG(SG_TERRASYNC, SG_DEBUG, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
} catch (sg_exception& ) {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
}
} else if (responseCode() == 404) {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
} else {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
}
_directory->repository()->finishedRequest(this);
}
virtual void onFail()
{
if (_directory) {
_directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
_directory->repository()->finishedRequest(this);
}
}
private:
static std::string makeUrl(HTTPDirectory* d)
{
return d->url() + "/.dirindex";
}
SGPath pathInRepo() const
{
SGPath p(_directory->absolutePath());
p.append(".dirindex");
return p;
}
simgear::sha1nfo hashContext;
std::string body;
bool _isRootDir; ///< is this the repository root?
std::string _targetHash;
};
HTTPRepoPrivate::~HTTPRepoPrivate()
{
// take a copy since cancelRequest will fail and hence remove
// remove activeRequests, invalidating any iterator to it.
RequestVector copyOfActive(activeRequests);
RequestVector::iterator rq;
for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
http->cancelRequest(*rq, "Repository object deleted");
}
DirectoryVector::iterator it;
for (it=directories.begin(); it != directories.end(); ++it) {
delete *it;
}
}
HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
{
RepoRequestPtr r(new FileGetRequest(dir, name));
r->setContentSize(sz);
makeRequest(r);
return r;
}
HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
{
RepoRequestPtr r(new DirGetRequest(dir, hash));
r->setContentSize(sz);
makeRequest(r);
return r;
}
class HashEntryWithPath
{
public:
HashEntryWithPath(const SGPath& p) : path(p.utf8Str()) {}
bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
{ return entry.filePath == path; }
private:
std::string path;
};
std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
{
HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p));
if (it != hashes.end()) {
// ensure data on disk hasn't changed.
// we could also use the file type here if we were paranoid
if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
return it->hashHex;
}
// entry in the cache, but it's stale so remove and fall through
hashes.erase(it);
}
std::string hash = computeHashForPath(p);
updatedFileContents(p, hash);
return hash;
}
std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
{
if (!p.exists())
return std::string();
sha1nfo info;
sha1_init(&info);
char* buf = static_cast<char*>(malloc(1024 * 1024));
size_t readLen;
SGBinaryFile f(p);
if (!f.open(SG_IO_IN)) {
throw sg_io_exception("Couldn't open file for compute hash", p);
}
while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
sha1_write(&info, buf, readLen);
}
f.close();
free(buf);
std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
return strutils::encodeHex(hashBytes);
}
void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
{
// remove the existing entry
HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p));
if (it != hashes.end()) {
hashes.erase(it);
hashCacheDirty = true;
}
if (newHash.empty()) {
return; // we're done
}
// use a cloned SGPath and reset its caching to force one stat() call
SGPath p2(p);
p2.set_cached(false);
p2.set_cached(true);
HashCacheEntry entry;
entry.filePath = p.utf8Str();
entry.hashHex = newHash;
entry.modTime = p2.modTime();
entry.lengthBytes = p2.sizeInBytes();
hashes.push_back(entry);
hashCacheDirty = true;
}
void HTTPRepoPrivate::writeHashCache()
{
if (!hashCacheDirty) {
return;
}
SGPath cachePath = basePath;
cachePath.append(".hashes");
sg_ofstream stream(cachePath, std::ios::out | std::ios::trunc | std::ios::binary);
HashCache::const_iterator it;
for (it = hashes.begin(); it != hashes.end(); ++it) {
stream << it->filePath << "*" << it->modTime << "*"
<< it->lengthBytes << "*" << it->hashHex << "\n";
}
stream.close();
hashCacheDirty = false;
}
void HTTPRepoPrivate::parseHashCache()
{
hashes.clear();
SGPath cachePath = basePath;
cachePath.append(".hashes");
if (!cachePath.exists()) {
return;
}
sg_ifstream stream(cachePath, std::ios::in);
while (!stream.eof()) {
std::string line;
std::getline(stream,line);
line = simgear::strutils::strip(line);
if( line.empty() || line[0] == '#' )
continue;
string_list tokens = simgear::strutils::split(line, "*");
if( tokens.size() < 4 ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
continue;
}
const std::string nameData = simgear::strutils::strip(tokens[0]);
const std::string timeData = simgear::strutils::strip(tokens[1]);
const std::string sizeData = simgear::strutils::strip(tokens[2]);
const std::string hashData = simgear::strutils::strip(tokens[3]);
if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath << "': '" << line << "' (ignoring line)");
continue;
}
HashCacheEntry entry;
entry.filePath = nameData;
entry.hashHex = hashData;
entry.modTime = strtol(timeData.c_str(), NULL, 10);
entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
hashes.push_back(entry);
}
}
class DirectoryWithPath
{
public:
DirectoryWithPath(const std::string& p) : path(p) {}
bool operator()(const HTTPDirectory* entry) const
{ return entry->relativePath() == path; }
private:
std::string path;
};
HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
{
DirectoryWithPath p(path);
DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
if (it != directories.end()) {
return *it;
}
HTTPDirectory* d = new HTTPDirectory(this, path);
directories.push_back(d);
return d;
}
bool HTTPRepoPrivate::deleteDirectory(const std::string& relPath, const SGPath& absPath)
{
DirectoryWithPath p(relPath);
auto it = std::find_if(directories.begin(), directories.end(), p);
if (it != directories.end()) {
HTTPDirectory* d = *it;
assert(d->absolutePath() == absPath);
directories.erase(it);
delete d;
} else {
// we encounter this code path when deleting an orphaned directory
}
Dir dir(absPath);
bool result = dir.remove(true);
// update the hash cache too
updatedFileContents(absPath, std::string());
return result;
}
void HTTPRepoPrivate::makeRequest(RepoRequestPtr req)
{
if (activeRequests.size() > 4) {
queuedRequests.push_back(req);
} else {
activeRequests.push_back(req);
http->makeRequest(req);
}
}
void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
{
RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
// in some cases, for example a checksum failure, we clear the active
// and queued request vectors, so the ::find above can fail
if (it != activeRequests.end()) {
activeRequests.erase(it);
}
if (!queuedRequests.empty()) {
RepoRequestPtr rr = queuedRequests.front();
queuedRequests.erase(queuedRequests.begin());
activeRequests.push_back(rr);
http->makeRequest(rr);
}
writeHashCache();
if (activeRequests.empty() && queuedRequests.empty()) {
isUpdating = false;
}
}
void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
{
SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl << " " << st);
status = st;
}
void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
HTTPRepository::ResultCode fileStatus)
{
if (fileStatus == HTTPRepository::REPO_ERROR_CHECKSUM) {
// stop updating, and mark repository as failed, becuase this
// usually indicates we need to start a fresh update from the
// root.
// (we could issue a retry here, but we leave that to higher layers)
status = fileStatus;
queuedRequests.clear();
RequestVector copyOfActive(activeRequests);
RequestVector::iterator rq;
for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
http->cancelRequest(*rq, "Repository updated failed due to checksum error");
}
SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update repository:" << baseUrl
<< "\n\tchecksum failure for: " << relativePath
<< "\n\tthis typically indicates the remote repository is corrupt or was being updated during the sync");
} else if (fileStatus == HTTPRepository::REPO_ERROR_CANCELLED) {
// if we were cancelled, don't report or log
return;
}
Failure f;
f.path = relativePath;
f.error = fileStatus;
failures.push_back(f);
SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " status/code: "
<< innerResultCodeAsString(fileStatus) << "/" << fileStatus);
}
} // of namespace simgear