507 lines
16 KiB
Python
507 lines
16 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
# virtual_path.py --- Classes used to manipulate slash-separated virtual paths
|
||
#
|
||
# Copyright (C) 2018 Florent Rougon
|
||
#
|
||
# This program is free software; you can redistribute it and/or
|
||
# modify it under the terms of the GNU General Public License as
|
||
# published by the Free Software Foundation; either version 2 of the
|
||
# License, or (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful, but
|
||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
# General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program; if not, write to the Free Software
|
||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||
|
||
"""Module containing the VirtualPath and MutableVirtualPath classes."""
|
||
|
||
import pathlib
|
||
|
||
|
||
class VirtualPath:
|
||
"""Class used to represent virtual paths using the slash separator.
|
||
|
||
This class always uses the slash ('/') as the separator between
|
||
components. For terrasync.py, the root path '/' corresponds to the
|
||
repository root, regardless of where it is stored (hard drive,
|
||
remote server, etc.).
|
||
|
||
Note: because of this, the class is not supposed to be used directly
|
||
for filesystem accesses, since some root directory or
|
||
protocol://server/root-dir prefix would have to be prepended
|
||
to provide reasonably useful functionality. This is why the
|
||
paths managed by this class are said to be virtual. This also
|
||
implies that even in Python 3.6 or later, this class should
|
||
*not* inherit from os.PathLike.
|
||
|
||
Whenever a given feature exists in pathlib.PurePath, this class
|
||
replicates the corresponding pathlib.PurePath API, but using
|
||
mixedCaseStyle instead of underscore_style (the latter being used
|
||
for every method of pathlib.PurePath). Of course, types are adapted:
|
||
for instance, methods of this class often return a VirtualPath
|
||
instance, whereas the corresponding pathlib.PurePath methods would
|
||
return a pathlib.PurePath instance.
|
||
|
||
"""
|
||
def __init__(self, p):
|
||
# Once this function exits, self._path *must not be changed* anymore
|
||
# (doing so would violate the contract for a hashable object: the
|
||
# hash must not change once the object has been constructed).
|
||
self._path = self.normalizeStringPath(p)
|
||
# This check could of course be skipped if it is found to really affect
|
||
# performance.
|
||
self._check()
|
||
|
||
def __str__(self):
|
||
"""Return a string representation of the path in self.
|
||
|
||
The return value:
|
||
- always starts with a '/';
|
||
- never ends with a '/' except if it is exactly '/' (i.e.,
|
||
the root virtual path).
|
||
|
||
"""
|
||
return self._path
|
||
|
||
def asPosix(self):
|
||
"""Return a string representation of the path in self.
|
||
|
||
This method returns str(self), it is only present for
|
||
compatibility with pathlib.PurePath.
|
||
|
||
"""
|
||
return str(self)
|
||
|
||
def __repr__(self):
|
||
return "{}.{}({!r})".format(__name__, type(self).__name__, self._path)
|
||
|
||
def __lt__(self, other):
|
||
# Allow sorting with instances of VirtualPath, or of any subclass. Note
|
||
# that the == operator (__eq__()) and therefore also != are stricter
|
||
# with respect to typing.
|
||
if isinstance(other, VirtualPath):
|
||
return self._path < other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __le__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path <= other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __eq__(self, other):
|
||
# The types must be the same, therefore a VirtualPath never compares
|
||
# equal to a MutableVirtualPath with the == operator. For such
|
||
# comparisons, use the samePath() method. If __eq__() (and thus
|
||
# necessarily __hash__()) were more lax about typing, adding
|
||
# VirtualPath instances and instances of hashable subclasses of
|
||
# VirtualPath with the same _path to a set or frozenset would lead to
|
||
# unintuitive behavior, since they would all be considered equal.
|
||
return type(self) == type(other) and self._path == other._path
|
||
|
||
# intentionally not implemented. Python3 provides a default implementation.
|
||
# def __ne__(self, other):
|
||
|
||
def __gt__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path > other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __ge__(self, other):
|
||
if isinstance(other, VirtualPath):
|
||
return self._path >= other._path
|
||
else:
|
||
return NotImplemented
|
||
|
||
def __hash__(self):
|
||
# Be strict about typing, as for __eq__().
|
||
return hash((type(self), self._path))
|
||
|
||
def samePath(self, other):
|
||
"""Compare the path with another instance, possibly of a subclass.
|
||
|
||
other -- instance of VirtualPath, or of a subclass of
|
||
VirtualPath
|
||
|
||
"""
|
||
if isinstance(other, VirtualPath):
|
||
return self._path == other._path
|
||
else:
|
||
raise TypeError("{obj!r} is of type {klass}, which is neither "
|
||
"VirtualPath nor a subclass thereof"
|
||
.format(obj=other, klass=type(other).__name__))
|
||
|
||
def _check(self):
|
||
"""Run consistency checks on self."""
|
||
assert (self._path.startswith('/') and not self._path.startswith('//')
|
||
and (self._path == '/' or not self._path.endswith('/'))), \
|
||
repr(self._path)
|
||
|
||
@classmethod
|
||
def normalizeStringPath(cls, path):
|
||
"""Normalize a string representing a virtual path.
|
||
|
||
path -- input path (string)
|
||
|
||
Return a string that always starts with a slash, never contains
|
||
consecutive slashes and only ends with a slash if it's the root
|
||
virtual path ('/').
|
||
|
||
If 'path' doesn't start with a slash ('/'), it is considered
|
||
relative to the root. This implies that if 'path' is the empty
|
||
string, the return value is '/'.
|
||
|
||
"""
|
||
if not path.startswith('/'):
|
||
# / is the “virtual root” of the TerraSync repository
|
||
path = '/' + path
|
||
elif path.startswith('//') and not path.startswith('///'):
|
||
# Nasty special case. As allowed (but not mandated!) by POSIX[1],
|
||
# in pathlib.PurePosixPath('//some/path'), no collapsing happens[2].
|
||
# This is only the case for exactly *two* *leading* slashes.
|
||
# [1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap04.html#tag_04_11
|
||
# [2] https://www.python.org/dev/peps/pep-0428/#construction
|
||
path = path[1:]
|
||
|
||
return pathlib.PurePosixPath(path).as_posix()
|
||
|
||
def __truediv__(self, s):
|
||
"""Path concatenation with the '/' operator.
|
||
|
||
's' must be a string representing a relative path using the '/'
|
||
separator, for instance "dir/subdir/other-subdir".
|
||
|
||
Return a new instance of type(self).
|
||
|
||
"""
|
||
assert not (s.startswith('/') or s.endswith('/')), repr(s)
|
||
|
||
if self._path == '/':
|
||
return type(self)(self._path + s)
|
||
else:
|
||
return type(self)(self._path + '/' + s)
|
||
|
||
def joinpath(self, *args):
|
||
"""Combine 'self' with each given string argument in turn.
|
||
|
||
Each argument should be of the form "foo", "foo/bar",
|
||
"foo/bar/baz", etc. Return the corresponding instance of
|
||
type(self).
|
||
|
||
>>> p = VirtualPath("/foo").joinpath("bar", "baz", "quux/zoot")
|
||
>>> str(p)
|
||
'/foo/bar/baz/quux/zoot'
|
||
|
||
"""
|
||
return self / '/'.join(args)
|
||
|
||
@property
|
||
def name(self):
|
||
"""Return a string representing the final path component.
|
||
|
||
>>> p = VirtualPath("/foo/bar/baz")
|
||
>>> p.name
|
||
'baz'
|
||
|
||
"""
|
||
pos = self._path.rfind('/')
|
||
assert pos != -1, (pos, self._path)
|
||
|
||
return self._path[pos+1:]
|
||
|
||
@property
|
||
def parts(self):
|
||
"""Return a tuple containing the path’s components.
|
||
|
||
>>> p = VirtualPath('/usr/bin/python3')
|
||
>>> p.parts
|
||
('/', 'usr', 'bin', 'python3')
|
||
|
||
"""
|
||
if self._path == "/":
|
||
return ('/',)
|
||
else:
|
||
# Skip the leading slash before splitting
|
||
return ('/',) + tuple(self._path[1:].split('/'))
|
||
|
||
def generateParents(self):
|
||
"""Generator function for the parents of the path.
|
||
|
||
See the 'parents' property for details.
|
||
|
||
"""
|
||
if self._path == '/':
|
||
return
|
||
|
||
assert self._path.startswith('/'), repr(self._path)
|
||
prevPos = len(self._path)
|
||
|
||
while True:
|
||
pos = self._path.rfind('/', 0, prevPos)
|
||
|
||
if pos > 0:
|
||
yield type(self)(self._path[:pos])
|
||
prevPos = pos
|
||
else:
|
||
assert pos == 0, pos
|
||
break
|
||
|
||
yield type(self)('/')
|
||
|
||
@property
|
||
def parents(self):
|
||
"""The path ancestors.
|
||
|
||
Return an immutable sequence providing access to the logical
|
||
ancestors of the path.
|
||
|
||
>>> p = VirtualPath('/foo/bar/baz')
|
||
>>> len(p.parents)
|
||
3
|
||
>>> p.parents[0]
|
||
terrasync.virtual_path.VirtualPath('/foo/bar')
|
||
>>> p.parents[1]
|
||
terrasync.virtual_path.VirtualPath('/foo')
|
||
>>> p.parents[2]
|
||
terrasync.virtual_path.VirtualPath('/')
|
||
|
||
"""
|
||
return tuple(self.generateParents())
|
||
|
||
@property
|
||
def parent(self):
|
||
"""The logical parent of the path.
|
||
|
||
>>> p = VirtualPath('/foo/bar/baz')
|
||
>>> p.parent
|
||
terrasync.virtual_path.VirtualPath('/foo/bar')
|
||
>>> q = VirtualPath('/')
|
||
>>> q.parent
|
||
terrasync.virtual_path.VirtualPath('/')
|
||
|
||
"""
|
||
pos = self._path.rfind('/')
|
||
assert pos >= 0, pos
|
||
|
||
if pos == 0:
|
||
return type(self)('/')
|
||
else:
|
||
return type(self)(self._path[:pos])
|
||
|
||
@property
|
||
def suffix(self):
|
||
"""The extension of the final component, if any.
|
||
|
||
>>> VirtualPath('/my/library/setup.py').suffix
|
||
'.py'
|
||
>>> VirtualPath('/my/library.tar.gz').suffix
|
||
'.gz'
|
||
>>> VirtualPath('/my/library').suffix
|
||
''
|
||
|
||
"""
|
||
name = self.name
|
||
pos = name.rfind('.')
|
||
return name[pos:] if pos != -1 else ''
|
||
|
||
@property
|
||
def suffixes(self):
|
||
"""A list of the path’s extensions.
|
||
|
||
>>> VirtualPath('/my/library/setup.py').suffixes
|
||
['.py']
|
||
>>> VirtualPath('/my/library.tar.gz').suffixes
|
||
['.tar', '.gz']
|
||
>>> VirtualPath('/my/library').suffixes
|
||
[]
|
||
|
||
"""
|
||
name = self.name
|
||
prevPos = len(name)
|
||
l = []
|
||
|
||
while True:
|
||
pos = name.rfind('.', 0, prevPos)
|
||
if pos == -1:
|
||
break
|
||
else:
|
||
l.insert(0, name[pos:prevPos])
|
||
prevPos = pos
|
||
|
||
return l
|
||
|
||
@property
|
||
def stem(self):
|
||
"""The final path component, without its suffix.
|
||
|
||
>>> VirtualPath('/my/library.tar.gz').stem
|
||
'library.tar'
|
||
>>> VirtualPath('/my/library.tar').stem
|
||
'library'
|
||
>>> VirtualPath('/my/library').stem
|
||
'library'
|
||
>>> VirtualPath('/').stem
|
||
''
|
||
|
||
"""
|
||
name = self.name
|
||
pos = name.rfind('.')
|
||
|
||
return name if pos == -1 else name[:pos]
|
||
|
||
def asRelative(self):
|
||
"""Return the virtual path without its leading '/'.
|
||
|
||
>>> p = VirtualPath('/usr/bin/python3')
|
||
>>> p.asRelative()
|
||
'usr/bin/python3'
|
||
|
||
>>> VirtualPath('').asRelative()
|
||
''
|
||
>>> VirtualPath('/').asRelative()
|
||
''
|
||
|
||
"""
|
||
assert self._path.startswith('/'), repr(self._path)
|
||
return self._path[1:]
|
||
|
||
def relativeTo(self, other):
|
||
"""Return the portion of this path that follows 'other'.
|
||
|
||
The return value is a string. If the operation is impossible,
|
||
ValueError is raised.
|
||
|
||
>>> VirtualPath('/etc/passwd').relativeTo('/')
|
||
'etc/passwd'
|
||
>>> VirtualPath('/etc/passwd').relativeTo('/etc')
|
||
'passwd'
|
||
|
||
"""
|
||
normedOther = self.normalizeStringPath(other)
|
||
|
||
if normedOther == '/':
|
||
return self._path[1:]
|
||
elif self._path.startswith(normedOther):
|
||
rest = self._path[len(normedOther):]
|
||
|
||
if rest.startswith('/'):
|
||
return rest[1:]
|
||
|
||
raise ValueError("{!r} does not start with '{}'".format(self, other))
|
||
|
||
def withName(self, newName):
|
||
"""Return a new VirtualPath instance with the 'name' part changed.
|
||
|
||
If the original path is '/' (which doesn’t have a name in the
|
||
sense of the 'name' property), ValueError is raised.
|
||
|
||
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
|
||
>>> p.withName('setup.py')
|
||
terrasync.virtual_path.VirtualPath('/foobar/downloads/setup.py')
|
||
|
||
"""
|
||
if self._path == '/':
|
||
raise ValueError("{!r} has an empty name".format(self))
|
||
else:
|
||
pos = self._path.rfind('/')
|
||
assert pos != -1, (pos, self._path)
|
||
|
||
if newName.startswith('/'):
|
||
raise ValueError("{!r} starts with a '/'".format(newName))
|
||
elif newName.endswith('/'):
|
||
raise ValueError("{!r} ends with a '/'".format(newName))
|
||
else:
|
||
return VirtualPath(self._path[:pos]) / newName
|
||
|
||
|
||
def withSuffix(self, newSuffix):
|
||
"""Return a new VirtualPath instance with the suffix changed.
|
||
|
||
If the original path doesn’t have a suffix, the new suffix is
|
||
appended:
|
||
|
||
>>> p = VirtualPath('/foobar/downloads/pathlib.tar.gz')
|
||
>>> p.withSuffix('.bz2')
|
||
terrasync.virtual_path.VirtualPath('/foobar/downloads/pathlib.tar.bz2')
|
||
>>> p = VirtualPath('/foobar/README')
|
||
>>> p.withSuffix('.txt')
|
||
terrasync.virtual_path.VirtualPath('/foobar/README.txt')
|
||
|
||
If 'self' is the root virtual path ('/') or 'newSuffix' doesn't
|
||
start with '.', ValueError is raised.
|
||
|
||
"""
|
||
if not newSuffix.startswith('.'):
|
||
raise ValueError("new suffix {!r} doesn't start with '.'"
|
||
.format(newSuffix))
|
||
|
||
name = self.name
|
||
if not name:
|
||
raise ValueError("{!r} has an empty 'name' part".format(self))
|
||
|
||
pos = name.rfind('.')
|
||
|
||
if pos == -1:
|
||
return self.withName(name + newSuffix) # append suffix
|
||
else:
|
||
return self.withName(name[:pos] + newSuffix) # replace suffix
|
||
|
||
|
||
class MutableVirtualPath(VirtualPath):
|
||
|
||
"""Mutable subclass of VirtualPath.
|
||
|
||
Contrary to VirtualPath objects, instances of this class can be
|
||
modified in-place with the /= operator, in order to append path
|
||
components. The price to pay for this advantage is that they can't
|
||
be used as dictionary keys or as elements of a set or frozenset,
|
||
because they are not hashable.
|
||
|
||
"""
|
||
|
||
__hash__ = None # ensure the type is not hashable
|
||
|
||
def _normalize(self):
|
||
self._path = self.normalizeStringPath(self._path)
|
||
|
||
def __itruediv__(self, s):
|
||
"""Path concatenation with the '/=' operator.
|
||
|
||
's' must be a string representing a relative path using the '/'
|
||
separator, for instance "dir/subdir/other-subdir".
|
||
|
||
"""
|
||
# This check could of course be skipped if it is found to really affect
|
||
# performance.
|
||
self._check()
|
||
assert not (s.startswith('/') or s.endswith('/')), repr(s)
|
||
|
||
if self._path == '/':
|
||
self._path += s
|
||
else:
|
||
self._path += '/' + s
|
||
|
||
# Collapse multiple slashes, remove trailing '/' except if the whole
|
||
# path is '/', etc.
|
||
self._normalize()
|
||
|
||
return self
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# The doctest setup below works, but for full test coverage, use the
|
||
# unittest framework (it is set up to automatically run all doctests from
|
||
# this module!).
|
||
#
|
||
# Hint: 'python3 -m unittest discover' from the TerraSync directory
|
||
# should do the trick.
|
||
import doctest
|
||
doctest.testmod()
|