| | |
| | |
| | |
| | |
| | import binascii |
| | import os |
| | import mmap |
| | import sys |
| | import time |
| | import errno |
| |
|
| | from io import BytesIO |
| |
|
| | from smmap import ( |
| | StaticWindowMapManager, |
| | SlidingWindowMapManager, |
| | SlidingWindowMapBuffer |
| | ) |
| |
|
| | |
| | |
| | mman = SlidingWindowMapManager() |
| | |
| |
|
| | import hashlib |
| |
|
| | try: |
| | from struct import unpack_from |
| | except ImportError: |
| | from struct import unpack, calcsize |
| | __calcsize_cache = dict() |
| |
|
| | def unpack_from(fmt, data, offset=0): |
| | try: |
| | size = __calcsize_cache[fmt] |
| | except KeyError: |
| | size = calcsize(fmt) |
| | __calcsize_cache[fmt] = size |
| | |
| | return unpack(fmt, data[offset: offset + size]) |
| | |
| |
|
| |
|
| | |
| |
|
| | hex_to_bin = binascii.a2b_hex |
| | bin_to_hex = binascii.b2a_hex |
| |
|
| | |
| | ENOENT = errno.ENOENT |
| |
|
| | |
| | exists = os.path.exists |
| | mkdir = os.mkdir |
| | chmod = os.chmod |
| | isdir = os.path.isdir |
| | isfile = os.path.isfile |
| | rename = os.rename |
| | dirname = os.path.dirname |
| | basename = os.path.basename |
| | join = os.path.join |
| | read = os.read |
| | write = os.write |
| | close = os.close |
| | fsync = os.fsync |
| |
|
| |
|
| | def _retry(func, *args, **kwargs): |
| | |
| | |
| | if sys.platform == "win32": |
| | for _ in range(10): |
| | try: |
| | return func(*args, **kwargs) |
| | except Exception: |
| | time.sleep(0.1) |
| | return func(*args, **kwargs) |
| | else: |
| | return func(*args, **kwargs) |
| |
|
| |
|
| | def remove(*args, **kwargs): |
| | return _retry(os.remove, *args, **kwargs) |
| |
|
| |
|
| | |
| | from gitdb.const import ( |
| | NULL_BIN_SHA, |
| | NULL_HEX_SHA |
| | ) |
| |
|
| | |
| |
|
| | |
| |
|
| |
|
| | class _RandomAccessBytesIO: |
| |
|
| | """Wrapper to provide required functionality in case memory maps cannot or may |
| | not be used. This is only really required in python 2.4""" |
| | __slots__ = '_sio' |
| |
|
| | def __init__(self, buf=''): |
| | self._sio = BytesIO(buf) |
| |
|
| | def __getattr__(self, attr): |
| | return getattr(self._sio, attr) |
| |
|
| | def __len__(self): |
| | return len(self.getvalue()) |
| |
|
| | def __getitem__(self, i): |
| | return self.getvalue()[i] |
| |
|
| | def __getslice__(self, start, end): |
| | return self.getvalue()[start:end] |
| |
|
| |
|
| | def byte_ord(b): |
| | """ |
| | Return the integer representation of the byte string. This supports Python |
| | 3 byte arrays as well as standard strings. |
| | """ |
| | try: |
| | return ord(b) |
| | except TypeError: |
| | return b |
| |
|
| | |
| |
|
| | |
| |
|
| |
|
| | def make_sha(source=b''): |
| | """A python2.4 workaround for the sha/hashlib module fiasco |
| | |
| | **Note** From the dulwich project """ |
| | try: |
| | return hashlib.sha1(source) |
| | except NameError: |
| | import sha |
| | sha1 = sha.sha(source) |
| | return sha1 |
| |
|
| |
|
| | def allocate_memory(size): |
| | """:return: a file-protocol accessible memory block of the given size""" |
| | if size == 0: |
| | return _RandomAccessBytesIO(b'') |
| | |
| |
|
| | try: |
| | return mmap.mmap(-1, size) |
| | except OSError: |
| | |
| | |
| | |
| | |
| | return _RandomAccessBytesIO(b"\0" * size) |
| | |
| |
|
| |
|
| | def file_contents_ro(fd, stream=False, allow_mmap=True): |
| | """:return: read-only contents of the file represented by the file descriptor fd |
| | |
| | :param fd: file descriptor opened for reading |
| | :param stream: if False, random access is provided, otherwise the stream interface |
| | is provided. |
| | :param allow_mmap: if True, its allowed to map the contents into memory, which |
| | allows large files to be handled and accessed efficiently. The file-descriptor |
| | will change its position if this is False""" |
| | try: |
| | if allow_mmap: |
| | |
| | try: |
| | return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) |
| | except OSError: |
| | |
| | return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) |
| | |
| | except OSError: |
| | pass |
| | |
| |
|
| | |
| | contents = os.read(fd, os.fstat(fd).st_size) |
| | if stream: |
| | return _RandomAccessBytesIO(contents) |
| | return contents |
| |
|
| |
|
| | def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): |
| | """Get the file contents at filepath as fast as possible |
| | |
| | :return: random access compatible memory of the given filepath |
| | :param stream: see ``file_contents_ro`` |
| | :param allow_mmap: see ``file_contents_ro`` |
| | :param flags: additional flags to pass to os.open |
| | :raise OSError: If the file could not be opened |
| | |
| | **Note** for now we don't try to use O_NOATIME directly as the right value needs to be |
| | shared per database in fact. It only makes a real difference for loose object |
| | databases anyway, and they use it with the help of the ``flags`` parameter""" |
| | fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) |
| | try: |
| | return file_contents_ro(fd, stream, allow_mmap) |
| | finally: |
| | close(fd) |
| | |
| |
|
| |
|
| | def sliding_ro_buffer(filepath, flags=0): |
| | """ |
| | :return: a buffer compatible object which uses our mapped memory manager internally |
| | ready to read the whole given filepath""" |
| | return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags) |
| |
|
| |
|
| | def to_hex_sha(sha): |
| | """:return: hexified version of sha""" |
| | if len(sha) == 40: |
| | return sha |
| | return bin_to_hex(sha) |
| |
|
| |
|
| | def to_bin_sha(sha): |
| | if len(sha) == 20: |
| | return sha |
| | return hex_to_bin(sha) |
| |
|
| |
|
| | |
| |
|
| |
|
| | |
| |
|
| | class LazyMixin: |
| |
|
| | """ |
| | Base class providing an interface to lazily retrieve attribute values upon |
| | first access. If slots are used, memory will only be reserved once the attribute |
| | is actually accessed and retrieved the first time. All future accesses will |
| | return the cached value as stored in the Instance's dict or slot. |
| | """ |
| |
|
| | __slots__ = tuple() |
| |
|
| | def __getattr__(self, attr): |
| | """ |
| | Whenever an attribute is requested that we do not know, we allow it |
| | to be created and set. Next time the same attribute is requested, it is simply |
| | returned from our dict/slots. """ |
| | self._set_cache_(attr) |
| | |
| | return object.__getattribute__(self, attr) |
| |
|
| | def _set_cache_(self, attr): |
| | """ |
| | This method should be overridden in the derived class. |
| | It should check whether the attribute named by attr can be created |
| | and cached. Do nothing if you do not know the attribute or call your subclass |
| | |
| | The derived class may create as many additional attributes as it deems |
| | necessary in case a git command returns more information than represented |
| | in the single attribute.""" |
| | pass |
| |
|
| |
|
| | class LockedFD: |
| |
|
| | """ |
| | This class facilitates a safe read and write operation to a file on disk. |
| | If we write to 'file', we obtain a lock file at 'file.lock' and write to |
| | that instead. If we succeed, the lock file will be renamed to overwrite |
| | the original file. |
| | |
| | When reading, we obtain a lock file, but to prevent other writers from |
| | succeeding while we are reading the file. |
| | |
| | This type handles error correctly in that it will assure a consistent state |
| | on destruction. |
| | |
| | **note** with this setup, parallel reading is not possible""" |
| | __slots__ = ("_filepath", '_fd', '_write') |
| |
|
| | def __init__(self, filepath): |
| | """Initialize an instance with the givne filepath""" |
| | self._filepath = filepath |
| | self._fd = None |
| | self._write = None |
| |
|
| | def __del__(self): |
| | |
| | if self._fd is not None: |
| | self.rollback() |
| |
|
| | def _lockfilepath(self): |
| | return "%s.lock" % self._filepath |
| |
|
| | def open(self, write=False, stream=False): |
| | """ |
| | Open the file descriptor for reading or writing, both in binary mode. |
| | |
| | :param write: if True, the file descriptor will be opened for writing. Other |
| | wise it will be opened read-only. |
| | :param stream: if True, the file descriptor will be wrapped into a simple stream |
| | object which supports only reading or writing |
| | :return: fd to read from or write to. It is still maintained by this instance |
| | and must not be closed directly |
| | :raise IOError: if the lock could not be retrieved |
| | :raise OSError: If the actual file could not be opened for reading |
| | |
| | **note** must only be called once""" |
| | if self._write is not None: |
| | raise AssertionError("Called %s multiple times" % self.open) |
| |
|
| | self._write = write |
| |
|
| | |
| | binary = getattr(os, 'O_BINARY', 0) |
| | lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary |
| | try: |
| | fd = os.open(self._lockfilepath(), lockmode, int("600", 8)) |
| | if not write: |
| | os.close(fd) |
| | else: |
| | self._fd = fd |
| | |
| | except OSError as e: |
| | raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e |
| | |
| |
|
| | |
| | if self._fd is None: |
| | |
| | try: |
| | self._fd = os.open(self._filepath, os.O_RDONLY | binary) |
| | except: |
| | |
| | remove(self._lockfilepath()) |
| | raise |
| | |
| | |
| |
|
| | if stream: |
| | |
| | from gitdb.stream import FDStream |
| | return FDStream(self._fd) |
| | else: |
| | return self._fd |
| | |
| |
|
| | def commit(self): |
| | """When done writing, call this function to commit your changes into the |
| | actual file. |
| | The file descriptor will be closed, and the lockfile handled. |
| | |
| | **Note** can be called multiple times""" |
| | self._end_writing(successful=True) |
| |
|
| | def rollback(self): |
| | """Abort your operation without any changes. The file descriptor will be |
| | closed, and the lock released. |
| | |
| | **Note** can be called multiple times""" |
| | self._end_writing(successful=False) |
| |
|
| | def _end_writing(self, successful=True): |
| | """Handle the lock according to the write mode """ |
| | if self._write is None: |
| | raise AssertionError("Cannot end operation if it wasn't started yet") |
| |
|
| | if self._fd is None: |
| | return |
| |
|
| | os.close(self._fd) |
| | self._fd = None |
| |
|
| | lockfile = self._lockfilepath() |
| | if self._write and successful: |
| | |
| | if sys.platform == "win32": |
| | if isfile(self._filepath): |
| | remove(self._filepath) |
| | |
| | |
| | os.rename(lockfile, self._filepath) |
| |
|
| | |
| | |
| | |
| | chmod(self._filepath, int("644", 8)) |
| | else: |
| | |
| | remove(lockfile) |
| | |
| |
|
| | |
| |
|