From 096810b31b37fea62556c99623d75568638f8d27 Mon Sep 17 00:00:00 2001 From: Ariel Eizenberg Date: Sun, 24 Dec 2023 16:24:03 +0200 Subject: [PATCH 1/5] Fix fork deadlock When a fork occurs while the sqlite db is mid-transaction, so sqlite transaction lock remains locked forever in the foree, deadlocking it. This patch protects against this case using os.register_at_fork(). --- diskcache/core.py | 4939 +++++++++++++++-------------- tests/test_fork_multithreading.py | 74 + 2 files changed, 2561 insertions(+), 2452 deletions(-) create mode 100644 tests/test_fork_multithreading.py diff --git a/diskcache/core.py b/diskcache/core.py index c7c8486..80911a7 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -1,2452 +1,2487 @@ -"""Core disk and file backed cache API. -""" - -import codecs -import contextlib as cl -import errno -import functools as ft -import io -import json -import os -import os.path as op -import pickle -import pickletools -import sqlite3 -import struct -import tempfile -import threading -import time -import warnings -import zlib - - -def full_name(func): - """Return full name of `func` by adding the module and function name.""" - return func.__module__ + '.' + func.__qualname__ - - -class Constant(tuple): - """Pretty display of immutable constant.""" - - def __new__(cls, name): - return tuple.__new__(cls, (name,)) - - def __repr__(self): - return '%s' % self[0] - - -DBNAME = 'cache.db' -ENOVAL = Constant('ENOVAL') -UNKNOWN = Constant('UNKNOWN') - -MODE_NONE = 0 -MODE_RAW = 1 -MODE_BINARY = 2 -MODE_TEXT = 3 -MODE_PICKLE = 4 - -DEFAULT_SETTINGS = { - 'statistics': 0, # False - 'tag_index': 0, # False - 'eviction_policy': 'least-recently-stored', - 'size_limit': 2**30, # 1gb - 'cull_limit': 10, - 'sqlite_auto_vacuum': 1, # FULL - 'sqlite_cache_size': 2**13, # 8,192 pages - 'sqlite_journal_mode': 'wal', - 'sqlite_mmap_size': 2**26, # 64mb - 'sqlite_synchronous': 1, # NORMAL - 'disk_min_file_size': 2**15, # 32kb - 'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, -} - -METADATA = { - 'count': 0, - 'size': 0, - 'hits': 0, - 'misses': 0, -} - -EVICTION_POLICY = { - 'none': { - 'init': None, - 'get': None, - 'cull': None, - }, - 'least-recently-stored': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_store_time ON' - ' Cache (store_time)' - ), - 'get': None, - 'cull': 'SELECT {fields} FROM Cache ORDER BY store_time LIMIT ?', - }, - 'least-recently-used': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_access_time ON' - ' Cache (access_time)' - ), - 'get': 'access_time = {now}', - 'cull': 'SELECT {fields} FROM Cache ORDER BY access_time LIMIT ?', - }, - 'least-frequently-used': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_access_count ON' - ' Cache (access_count)' - ), - 'get': 'access_count = access_count + 1', - 'cull': 'SELECT {fields} FROM Cache ORDER BY access_count LIMIT ?', - }, -} - - -class Disk: - """Cache key and value serialization for SQLite database and files.""" - - def __init__(self, directory, min_file_size=0, pickle_protocol=0): - """Initialize disk instance. - - :param str directory: directory path - :param int min_file_size: minimum size for file use - :param int pickle_protocol: pickle protocol for serialization - - """ - self._directory = directory - self.min_file_size = min_file_size - self.pickle_protocol = pickle_protocol - - def hash(self, key): - """Compute portable hash for `key`. - - :param key: key to hash - :return: hash value - - """ - mask = 0xFFFFFFFF - disk_key, _ = self.put(key) - type_disk_key = type(disk_key) - - if type_disk_key is sqlite3.Binary: - return zlib.adler32(disk_key) & mask - elif type_disk_key is str: - return zlib.adler32(disk_key.encode('utf-8')) & mask # noqa - elif type_disk_key is int: - return disk_key % mask - else: - assert type_disk_key is float - return zlib.adler32(struct.pack('!d', disk_key)) & mask - - def put(self, key): - """Convert `key` to fields key and raw for Cache table. - - :param key: key to convert - :return: (database key, raw boolean) pair - - """ - # pylint: disable=unidiomatic-typecheck - type_key = type(key) - - if type_key is bytes: - return sqlite3.Binary(key), True - elif ( - (type_key is str) - or ( - type_key is int - and -9223372036854775808 <= key <= 9223372036854775807 - ) - or (type_key is float) - ): - return key, True - else: - data = pickle.dumps(key, protocol=self.pickle_protocol) - result = pickletools.optimize(data) - return sqlite3.Binary(result), False - - def get(self, key, raw): - """Convert fields `key` and `raw` from Cache table to key. - - :param key: database key to convert - :param bool raw: flag indicating raw database storage - :return: corresponding Python key - - """ - # pylint: disable=unidiomatic-typecheck - if raw: - return bytes(key) if type(key) is sqlite3.Binary else key - else: - return pickle.load(io.BytesIO(key)) - - def store(self, value, read, key=UNKNOWN): - """Convert `value` to fields size, mode, filename, and value for Cache - table. - - :param value: value to convert - :param bool read: True when value is file-like object - :param key: key for item (default UNKNOWN) - :return: (size, mode, filename, value) tuple for Cache table - - """ - # pylint: disable=unidiomatic-typecheck - type_value = type(value) - min_file_size = self.min_file_size - - if ( - (type_value is str and len(value) < min_file_size) - or ( - type_value is int - and -9223372036854775808 <= value <= 9223372036854775807 - ) - or (type_value is float) - ): - return 0, MODE_RAW, None, value - elif type_value is bytes: - if len(value) < min_file_size: - return 0, MODE_RAW, None, sqlite3.Binary(value) - else: - filename, full_path = self.filename(key, value) - self._write(full_path, io.BytesIO(value), 'xb') - return len(value), MODE_BINARY, filename, None - elif type_value is str: - filename, full_path = self.filename(key, value) - self._write(full_path, io.StringIO(value), 'x', 'UTF-8') - size = op.getsize(full_path) - return size, MODE_TEXT, filename, None - elif read: - reader = ft.partial(value.read, 2**22) - filename, full_path = self.filename(key, value) - iterator = iter(reader, b'') - size = self._write(full_path, iterator, 'xb') - return size, MODE_BINARY, filename, None - else: - result = pickle.dumps(value, protocol=self.pickle_protocol) - - if len(result) < min_file_size: - return 0, MODE_PICKLE, None, sqlite3.Binary(result) - else: - filename, full_path = self.filename(key, value) - self._write(full_path, io.BytesIO(result), 'xb') - return len(result), MODE_PICKLE, filename, None - - def _write(self, full_path, iterator, mode, encoding=None): - full_dir, _ = op.split(full_path) - - for count in range(1, 11): - with cl.suppress(OSError): - os.makedirs(full_dir) - - try: - # Another cache may have deleted the directory before - # the file could be opened. - writer = open(full_path, mode, encoding=encoding) - except OSError: - if count == 10: - # Give up after 10 tries to open the file. - raise - continue - - with writer: - size = 0 - for chunk in iterator: - size += len(chunk) - writer.write(chunk) - return size - - def fetch(self, mode, filename, value, read): - """Convert fields `mode`, `filename`, and `value` from Cache table to - value. - - :param int mode: value mode raw, binary, text, or pickle - :param str filename: filename of corresponding value - :param value: database value - :param bool read: when True, return an open file handle - :return: corresponding Python value - :raises: IOError if the value cannot be read - - """ - # pylint: disable=unidiomatic-typecheck,consider-using-with - if mode == MODE_RAW: - return bytes(value) if type(value) is sqlite3.Binary else value - elif mode == MODE_BINARY: - if read: - return open(op.join(self._directory, filename), 'rb') - else: - with open(op.join(self._directory, filename), 'rb') as reader: - return reader.read() - elif mode == MODE_TEXT: - full_path = op.join(self._directory, filename) - with open(full_path, 'r', encoding='UTF-8') as reader: - return reader.read() - elif mode == MODE_PICKLE: - if value is None: - with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) - else: - return pickle.load(io.BytesIO(value)) - - def filename(self, key=UNKNOWN, value=UNKNOWN): - """Return filename and full-path tuple for file storage. - - Filename will be a randomly generated 28 character hexadecimal string - with ".val" suffixed. Two levels of sub-directories will be used to - reduce the size of directories. On older filesystems, lookups in - directories with many files may be slow. - - The default implementation ignores the `key` and `value` parameters. - - In some scenarios, for example :meth:`Cache.push - `, the `key` or `value` may not be known when the - item is stored in the cache. - - :param key: key for item (default UNKNOWN) - :param value: value for item (default UNKNOWN) - - """ - # pylint: disable=unused-argument - hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') - sub_dir = op.join(hex_name[:2], hex_name[2:4]) - name = hex_name[4:] + '.val' - filename = op.join(sub_dir, name) - full_path = op.join(self._directory, filename) - return filename, full_path - - def remove(self, file_path): - """Remove a file given by `file_path`. - - This method is cross-thread and cross-process safe. If an OSError - occurs, it is suppressed. - - :param str file_path: relative path to file - - """ - full_path = op.join(self._directory, file_path) - full_dir, _ = op.split(full_path) - - # Suppress OSError that may occur if two caches attempt to delete the - # same file or directory at the same time. - - with cl.suppress(OSError): - os.remove(full_path) - - with cl.suppress(OSError): - os.removedirs(full_dir) - - -class JSONDisk(Disk): - """Cache key and value using JSON serialization with zlib compression.""" - - def __init__(self, directory, compress_level=1, **kwargs): - """Initialize JSON disk instance. - - Keys and values are compressed using the zlib library. The - `compress_level` is an integer from 0 to 9 controlling the level of - compression; 1 is fastest and produces the least compression, 9 is - slowest and produces the most compression, and 0 is no compression. - - :param str directory: directory path - :param int compress_level: zlib compression level (default 1) - :param kwargs: super class arguments - - """ - self.compress_level = compress_level - super().__init__(directory, **kwargs) - - def put(self, key): - json_bytes = json.dumps(key).encode('utf-8') - data = zlib.compress(json_bytes, self.compress_level) - return super().put(data) - - def get(self, key, raw): - data = super().get(key, raw) - return json.loads(zlib.decompress(data).decode('utf-8')) - - def store(self, value, read, key=UNKNOWN): - if not read: - json_bytes = json.dumps(value).encode('utf-8') - value = zlib.compress(json_bytes, self.compress_level) - return super().store(value, read, key=key) - - def fetch(self, mode, filename, value, read): - data = super().fetch(mode, filename, value, read) - if not read: - data = json.loads(zlib.decompress(data).decode('utf-8')) - return data - - -class Timeout(Exception): - """Database timeout expired.""" - - -class UnknownFileWarning(UserWarning): - """Warning used by Cache.check for unknown files.""" - - -class EmptyDirWarning(UserWarning): - """Warning used by Cache.check for empty directories.""" - - -def args_to_key(base, args, kwargs, typed, ignore): - """Create cache key out of function arguments. - - :param tuple base: base of key - :param tuple args: function arguments - :param dict kwargs: function keyword arguments - :param bool typed: include types in cache key - :param set ignore: positional or keyword args to ignore - :return: cache key tuple - - """ - args = tuple(arg for index, arg in enumerate(args) if index not in ignore) - key = base + args + (None,) - - if kwargs: - kwargs = {key: val for key, val in kwargs.items() if key not in ignore} - sorted_items = sorted(kwargs.items()) - - for item in sorted_items: - key += item - - if typed: - key += tuple(type(arg) for arg in args) - - if kwargs: - key += tuple(type(value) for _, value in sorted_items) - - return key - - -class Cache: - """Disk and file backed cache.""" - - def __init__(self, directory=None, timeout=60, disk=Disk, **settings): - """Initialize cache instance. - - :param str directory: cache directory - :param float timeout: SQLite connection timeout - :param disk: Disk type or subclass for serialization - :param settings: any of DEFAULT_SETTINGS - - """ - try: - assert issubclass(disk, Disk) - except (TypeError, AssertionError): - raise ValueError('disk must subclass diskcache.Disk') from None - - if directory is None: - directory = tempfile.mkdtemp(prefix='diskcache-') - directory = str(directory) - directory = op.expanduser(directory) - directory = op.expandvars(directory) - - self._directory = directory - self._timeout = 0 # Manually handle retries during initialization. - self._local = threading.local() - self._txn_id = None - - if not op.isdir(directory): - try: - os.makedirs(directory, 0o755) - except OSError as error: - if error.errno != errno.EEXIST: - raise EnvironmentError( - error.errno, - 'Cache directory "%s" does not exist' - ' and could not be created' % self._directory, - ) from None - - sql = self._sql_retry - - # Setup Settings table. - - try: - current_settings = dict( - sql('SELECT key, value FROM Settings').fetchall() - ) - except sqlite3.OperationalError: - current_settings = {} - - sets = DEFAULT_SETTINGS.copy() - sets.update(current_settings) - sets.update(settings) - - for key in METADATA: - sets.pop(key, None) - - # Chance to set pragmas before any tables are created. - - for key, value in sorted(sets.items()): - if key.startswith('sqlite_'): - self.reset(key, value, update=False) - - sql( - 'CREATE TABLE IF NOT EXISTS Settings (' - ' key TEXT NOT NULL UNIQUE,' - ' value)' - ) - - # Setup Disk object (must happen after settings initialized). - - kwargs = { - key[5:]: value - for key, value in sets.items() - if key.startswith('disk_') - } - self._disk = disk(directory, **kwargs) - - # Set cached attributes: updates settings and sets pragmas. - - for key, value in sets.items(): - query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key, value) - - for key, value in METADATA.items(): - query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key) - - ((self._page_size,),) = sql('PRAGMA page_size').fetchall() - - # Setup Cache table. - - sql( - 'CREATE TABLE IF NOT EXISTS Cache (' - ' rowid INTEGER PRIMARY KEY,' - ' key BLOB,' - ' raw INTEGER,' - ' store_time REAL,' - ' expire_time REAL,' - ' access_time REAL,' - ' access_count INTEGER DEFAULT 0,' - ' tag BLOB,' - ' size INTEGER DEFAULT 0,' - ' mode INTEGER DEFAULT 0,' - ' filename TEXT,' - ' value BLOB)' - ) - - sql( - 'CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' - ' Cache(key, raw)' - ) - - sql( - 'CREATE INDEX IF NOT EXISTS Cache_expire_time ON' - ' Cache (expire_time)' - ) - - query = EVICTION_POLICY[self.eviction_policy]['init'] - - if query is not None: - sql(query) - - # Use triggers to keep Metadata updated. - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_count_insert' - ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value + 1' - ' WHERE key = "count"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_count_delete' - ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value - 1' - ' WHERE key = "count"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_insert' - ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value + NEW.size' - ' WHERE key = "size"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_update' - ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings' - ' SET value = value + NEW.size - OLD.size' - ' WHERE key = "size"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_delete' - ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value - OLD.size' - ' WHERE key = "size"; END' - ) - - # Create tag index if requested. - - if self.tag_index: # pylint: disable=no-member - self.create_tag_index() - else: - self.drop_tag_index() - - # Close and re-open database connection with given timeout. - - self.close() - self._timeout = timeout - self._sql # pylint: disable=pointless-statement - - @property - def directory(self): - """Cache directory.""" - return self._directory - - @property - def timeout(self): - """SQLite connection timeout value in seconds.""" - return self._timeout - - @property - def disk(self): - """Disk used for serialization.""" - return self._disk - - @property - def _con(self): - # Check process ID to support process forking. If the process - # ID changes, close the connection and update the process ID. - - local_pid = getattr(self._local, 'pid', None) - pid = os.getpid() - - if local_pid != pid: - self.close() - self._local.pid = pid - - con = getattr(self._local, 'con', None) - - if con is None: - con = self._local.con = sqlite3.connect( - op.join(self._directory, DBNAME), - timeout=self._timeout, - isolation_level=None, - ) - - # Some SQLite pragmas work on a per-connection basis so - # query the Settings table and reset the pragmas. The - # Settings table may not exist so catch and ignore the - # OperationalError that may occur. - - try: - select = 'SELECT key, value FROM Settings' - settings = con.execute(select).fetchall() - except sqlite3.OperationalError: - pass - else: - for key, value in settings: - if key.startswith('sqlite_'): - self.reset(key, value, update=False) - - return con - - @property - def _sql(self): - return self._con.execute - - @property - def _sql_retry(self): - sql = self._sql - - # 2018-11-01 GrantJ - Some SQLite builds/versions handle - # the SQLITE_BUSY return value and connection parameter - # "timeout" differently. For a more reliable duration, - # manually retry the statement for 60 seconds. Only used - # by statements which modify the database and do not use - # a transaction (like those in ``__init__`` or ``reset``). - # See Issue #85 for and tests/issue_85.py for more details. - - def _execute_with_retry(statement, *args, **kwargs): - start = time.time() - while True: - try: - return sql(statement, *args, **kwargs) - except sqlite3.OperationalError as exc: - if str(exc) != 'database is locked': - raise - diff = time.time() - start - if diff > 60: - raise - time.sleep(0.001) - - return _execute_with_retry - - @cl.contextmanager - def transact(self, retry=False): - """Context manager to perform a transaction by locking the cache. - - While the cache is locked, no other write operation is permitted. - Transactions should therefore be as short as possible. Read and write - operations performed in a transaction are atomic. Read operations may - occur concurrent to a transaction. - - Transactions may be nested and may not be shared between threads. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - >>> cache = Cache() - >>> with cache.transact(): # Atomically increment two keys. - ... _ = cache.incr('total', 123.4) - ... _ = cache.incr('count', 1) - >>> with cache.transact(): # Atomically calculate average. - ... average = cache['total'] / cache['count'] - >>> average - 123.4 - - :param bool retry: retry if database timeout occurs (default False) - :return: context manager for use in `with` statement - :raises Timeout: if database timeout occurs - - """ - with self._transact(retry=retry): - yield - - @cl.contextmanager - def _transact(self, retry=False, filename=None): - sql = self._sql - filenames = [] - _disk_remove = self._disk.remove - tid = threading.get_ident() - txn_id = self._txn_id - - if tid == txn_id: - begin = False - else: - while True: - try: - sql('BEGIN IMMEDIATE') - begin = True - self._txn_id = tid - break - except sqlite3.OperationalError: - if retry: - continue - if filename is not None: - _disk_remove(filename) - raise Timeout from None - - try: - yield sql, filenames.append - except BaseException: - if begin: - assert self._txn_id == tid - self._txn_id = None - sql('ROLLBACK') - raise - else: - if begin: - assert self._txn_id == tid - self._txn_id = None - sql('COMMIT') - for name in filenames: - if name is not None: - _disk_remove(name) - - def set(self, key, value, expire=None, read=False, tag=None, retry=False): - """Set `key` and `value` item in cache. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param value: value for item - :param float expire: seconds until item expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was set - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read, key=key) - columns = (expire_time, tag, size, mode, filename, db_value) - - # The order of SELECT, UPDATE, and INSERT is important below. - # - # Typical cache usage pattern is: - # - # value = cache.get(key) - # if value is None: - # value = expensive_calculation() - # cache.set(key, value) - # - # Cache.get does not evict expired keys to avoid writes during lookups. - # Commonly used/expired keys will therefore remain in the cache making - # an UPDATE the preferred path. - # - # The alternative is to assume the key is not present by first trying - # to INSERT and then handling the IntegrityError that occurs from - # violating the UNIQUE constraint. This optimistic approach was - # rejected based on the common cache usage pattern. - # - # INSERT OR REPLACE aka UPSERT is not used because the old filename may - # need cleanup. - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_filename),) = rows - cleanup(old_filename) - self._row_update(rowid, now, columns) - else: - self._row_insert(db_key, raw, now, columns) - - self._cull(now, sql, cleanup) - - return True - - def __setitem__(self, key, value): - """Set corresponding `value` for `key` in cache. - - :param key: key for item - :param value: value for item - :return: corresponding value - :raises KeyError: if key is not found - - """ - self.set(key, value, retry=True) - - def _row_update(self, rowid, now, columns): - sql = self._sql - expire_time, tag, size, mode, filename, value = columns - sql( - 'UPDATE Cache SET' - ' store_time = ?,' - ' expire_time = ?,' - ' access_time = ?,' - ' access_count = ?,' - ' tag = ?,' - ' size = ?,' - ' mode = ?,' - ' filename = ?,' - ' value = ?' - ' WHERE rowid = ?', - ( - now, # store_time - expire_time, - now, # access_time - 0, # access_count - tag, - size, - mode, - filename, - value, - rowid, - ), - ) - - def _row_insert(self, key, raw, now, columns): - sql = self._sql - expire_time, tag, size, mode, filename, value = columns - sql( - 'INSERT INTO Cache(' - ' key, raw, store_time, expire_time, access_time,' - ' access_count, tag, size, mode, filename, value' - ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', - ( - key, - raw, - now, # store_time - expire_time, - now, # access_time - 0, # access_count - tag, - size, - mode, - filename, - value, - ), - ) - - def _cull(self, now, sql, cleanup, limit=None): - cull_limit = self.cull_limit if limit is None else limit - - if cull_limit == 0: - return - - # Evict expired keys. - - select_expired_template = ( - 'SELECT %s FROM Cache' - ' WHERE expire_time IS NOT NULL AND expire_time < ?' - ' ORDER BY expire_time LIMIT ?' - ) - - select_expired = select_expired_template % 'filename' - rows = sql(select_expired, (now, cull_limit)).fetchall() - - if rows: - delete_expired = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( - select_expired_template % 'rowid' - ) - sql(delete_expired, (now, cull_limit)) - - for (filename,) in rows: - cleanup(filename) - - cull_limit -= len(rows) - - if cull_limit == 0: - return - - # Evict keys by policy. - - select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] - - if select_policy is None or self.volume() < self.size_limit: - return - - select_filename = select_policy.format(fields='filename', now=now) - rows = sql(select_filename, (cull_limit,)).fetchall() - - if rows: - delete = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( - select_policy.format(fields='rowid', now=now) - ) - sql(delete, (cull_limit,)) - - for (filename,) in rows: - cleanup(filename) - - def touch(self, key, expire=None, retry=False): - """Touch `key` in cache and update `expire` time. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param float expire: seconds until item expires - (default None, no expiry) - :param bool retry: retry if database timeout occurs (default False) - :return: True if key was touched - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - - with self._transact(retry) as (sql, _): - rows = sql( - 'SELECT rowid, expire_time FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_expire_time),) = rows - - if old_expire_time is None or old_expire_time > now: - sql( - 'UPDATE Cache SET expire_time = ? WHERE rowid = ?', - (expire_time, rowid), - ) - return True - - return False - - def add(self, key, value, expire=None, read=False, tag=None, retry=False): - """Add `key` and `value` item to cache. - - Similar to `set`, but only add to cache if key not present. - - Operation is atomic. Only one concurrent add operation for a given key - will succeed. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param value: value for item - :param float expire: seconds until the key expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was added - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read, key=key) - columns = (expire_time, tag, size, mode, filename, db_value) - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename, expire_time FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_filename, old_expire_time),) = rows - - if old_expire_time is None or old_expire_time > now: - cleanup(filename) - return False - - cleanup(old_filename) - self._row_update(rowid, now, columns) - else: - self._row_insert(db_key, raw, now, columns) - - self._cull(now, sql, cleanup) - - return True - - def incr(self, key, delta=1, default=0, retry=False): - """Increment value by delta for item with key. - - If key is missing and default is None then raise KeyError. Else if key - is missing and default is not None then use default for value. - - Operation is atomic. All concurrent increment operations will be - counted individually. - - Assumes value may be stored in a SQLite column. Most builds that target - machines with 64-bit pointer widths will support 64-bit signed - integers. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param int delta: amount to increment (default 1) - :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout occurs (default False) - :return: new value for item - :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid, expire_time, filename, value FROM Cache' - ' WHERE key = ? AND raw = ?' - ) - - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (db_key, raw)).fetchall() - - if not rows: - if default is None: - raise KeyError(key) - - value = default + delta - columns = (None, None) + self._disk.store( - value, False, key=key - ) - self._row_insert(db_key, raw, now, columns) - self._cull(now, sql, cleanup) - return value - - ((rowid, expire_time, filename, value),) = rows - - if expire_time is not None and expire_time < now: - if default is None: - raise KeyError(key) - - value = default + delta - columns = (None, None) + self._disk.store( - value, False, key=key - ) - self._row_update(rowid, now, columns) - self._cull(now, sql, cleanup) - cleanup(filename) - return value - - value += delta - - columns = 'store_time = ?, value = ?' - update_column = EVICTION_POLICY[self.eviction_policy]['get'] - - if update_column is not None: - columns += ', ' + update_column.format(now=now) - - update = 'UPDATE Cache SET %s WHERE rowid = ?' % columns - sql(update, (now, value, rowid)) - - return value - - def decr(self, key, delta=1, default=0, retry=False): - """Decrement value by delta for item with key. - - If key is missing and default is None then raise KeyError. Else if key - is missing and default is not None then use default for value. - - Operation is atomic. All concurrent decrement operations will be - counted individually. - - Unlike Memcached, negative values are supported. Value may be - decremented below zero. - - Assumes value may be stored in a SQLite column. Most builds that target - machines with 64-bit pointer widths will support 64-bit signed - integers. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param int delta: amount to decrement (default 1) - :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout occurs (default False) - :return: new value for item - :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout occurs - - """ - return self.incr(key, -delta, default, retry) - - def get( - self, - key, - default=None, - read=False, - expire_time=False, - tag=False, - retry=False, - ): - """Retrieve value from cache. If `key` is missing, return `default`. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param default: value to return if key is missing (default None) - :param bool read: if True, return file handle to value - (default False) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: value for item or default if key not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - update_column = EVICTION_POLICY[self.eviction_policy]['get'] - select = ( - 'SELECT rowid, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - if expire_time and tag: - default = (default, None, None) - elif expire_time or tag: - default = (default, None) - - if not self.statistics and update_column is None: - # Fast path, no transaction necessary. - - rows = self._sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - return default - - ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows - - try: - value = self._disk.fetch(mode, filename, db_value, read) - except IOError: - # Key was deleted before we could retrieve result. - return default - - else: # Slow path, transaction required. - cache_hit = ( - 'UPDATE Settings SET value = value + 1 WHERE key = "hits"' - ) - cache_miss = ( - 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' - ) - - with self._transact(retry) as (sql, _): - rows = sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - if self.statistics: - sql(cache_miss) - return default - - ( - (rowid, db_expire_time, db_tag, mode, filename, db_value), - ) = rows # noqa: E127 - - try: - value = self._disk.fetch(mode, filename, db_value, read) - except IOError: - # Key was deleted before we could retrieve result. - if self.statistics: - sql(cache_miss) - return default - - if self.statistics: - sql(cache_hit) - - now = time.time() - update = 'UPDATE Cache SET %s WHERE rowid = ?' - - if update_column is not None: - sql(update % update_column.format(now=now), (rowid,)) - - if expire_time and tag: - return (value, db_expire_time, db_tag) - elif expire_time: - return (value, db_expire_time) - elif tag: - return (value, db_tag) - else: - return value - - def __getitem__(self, key): - """Return corresponding value for `key` from cache. - - :param key: key matching item - :return: corresponding value - :raises KeyError: if key is not found - - """ - value = self.get(key, default=ENOVAL, retry=True) - if value is ENOVAL: - raise KeyError(key) - return value - - def read(self, key, retry=False): - """Return file handle value corresponding to `key` from cache. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default False) - :return: file open for reading in binary mode - :raises KeyError: if key is not found - :raises Timeout: if database timeout occurs - - """ - handle = self.get(key, default=ENOVAL, read=True, retry=retry) - if handle is ENOVAL: - raise KeyError(key) - return handle - - def __contains__(self, key): - """Return `True` if `key` matching item is found in cache. - - :param key: key matching item - :return: True if key matching item - - """ - sql = self._sql - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid FROM Cache' - ' WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - rows = sql(select, (db_key, raw, time.time())).fetchall() - - return bool(rows) - - def pop( - self, key, default=None, expire_time=False, tag=False, retry=False - ): # noqa: E501 - """Remove corresponding item for `key` from cache and return value. - - If `key` is missing, return `default`. - - Operation is atomic. Concurrent operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param default: value to return if key is missing (default None) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: value for item or default if key not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - with self._transact(retry) as (sql, _): - rows = sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - return default - - ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows - - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - try: - value = self._disk.fetch(mode, filename, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - return default - finally: - if filename is not None: - self._disk.remove(filename) - - if expire_time and tag: - return value, db_expire_time, db_tag - elif expire_time: - return value, db_expire_time - elif tag: - return value, db_tag - else: - return value - - def __delitem__(self, key, retry=True): - """Delete corresponding item for `key` from cache. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default `True`). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default True) - :raises KeyError: if key is not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - - with self._transact(retry) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename FROM Cache' - ' WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)', - (db_key, raw, time.time()), - ).fetchall() - - if not rows: - raise KeyError(key) - - ((rowid, filename),) = rows - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(filename) - - return True - - def delete(self, key, retry=False): - """Delete corresponding item for `key` from cache. - - Missing keys are ignored. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was deleted - :raises Timeout: if database timeout occurs - - """ - # pylint: disable=unnecessary-dunder-call - try: - return self.__delitem__(key, retry=retry) - except KeyError: - return False - - def push( - self, - value, - prefix=None, - side='back', - expire=None, - read=False, - tag=None, - retry=False, - ): - """Push `value` onto `side` of queue identified by `prefix` in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - Defaults to pushing value on back of queue. Set side to 'front' to push - value on front of queue. Side must be one of 'back' or 'front'. - - Operation is atomic. Concurrent operations will be serialized. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.pull`. - - >>> cache = Cache() - >>> print(cache.push('first value')) - 500000000000000 - >>> cache.get(500000000000000) - 'first value' - >>> print(cache.push('second value')) - 500000000000001 - >>> print(cache.push('third value', side='front')) - 499999999999999 - >>> cache.push(1234, prefix='userids') - 'userids-500000000000000' - - :param value: value for item - :param str prefix: key prefix (default None, key is integer) - :param str side: either 'back' or 'front' (default 'back') - :param float expire: seconds until the key expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: key for item in cache - :raises Timeout: if database timeout occurs - - """ - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - now = time.time() - raw = True - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read) - columns = (expire_time, tag, size, mode, filename, db_value) - order = {'back': 'DESC', 'front': 'ASC'} - select = ( - 'SELECT key FROM Cache' - ' WHERE ? < key AND key < ? AND raw = ?' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql(select, (min_key, max_key, raw)).fetchall() - - if rows: - ((key,),) = rows - - if prefix is not None: - num = int(key[(key.rfind('-') + 1) :]) - else: - num = key - - if side == 'back': - num += 1 - else: - assert side == 'front' - num -= 1 - else: - num = 500000000000000 - - if prefix is not None: - db_key = '{0}-{1:015d}'.format(prefix, num) - else: - db_key = num - - self._row_insert(db_key, raw, now, columns) - self._cull(now, sql, cleanup) - - return db_key - - def pull( - self, - prefix=None, - default=(None, None), - side='front', - expire_time=False, - tag=False, - retry=False, - ): - """Pull key and value item pair from `side` of queue in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - If queue is empty, return default. - - Defaults to pulling key and value item pairs from front of queue. Set - side to 'back' to pull from back of queue. Side must be one of 'front' - or 'back'. - - Operation is atomic. Concurrent operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.push` and `Cache.get`. - - >>> cache = Cache() - >>> cache.pull() - (None, None) - >>> for letter in 'abc': - ... print(cache.push(letter)) - 500000000000000 - 500000000000001 - 500000000000002 - >>> key, value = cache.pull() - >>> print(key) - 500000000000000 - >>> value - 'a' - >>> _, value = cache.pull(side='back') - >>> value - 'c' - >>> cache.push(1234, 'userids') - 'userids-500000000000000' - >>> _, value = cache.pull('userids') - >>> value - 1234 - - :param str prefix: key prefix (default None, key is integer) - :param default: value to return if key is missing - (default (None, None)) - :param str side: either 'front' or 'back' (default 'front') - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair or default if queue is empty - :raises Timeout: if database timeout occurs - - """ - # Caution: Nearly identical code exists in Cache.peek - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - order = {'front': 'ASC', 'back': 'DESC'} - select = ( - 'SELECT rowid, key, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (min_key, max_key)).fetchall() - - if not rows: - return default - - ( - (rowid, key, db_expire, db_tag, mode, name, db_value), - ) = rows - - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - if db_expire is not None and db_expire < time.time(): - cleanup(name) - else: - break - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - finally: - if name is not None: - self._disk.remove(name) - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def peek( - self, - prefix=None, - default=(None, None), - side='front', - expire_time=False, - tag=False, - retry=False, - ): - """Peek at key and value item pair from `side` of queue in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - If queue is empty, return default. - - Defaults to peeking at key and value item pairs from front of queue. - Set side to 'back' to pull from back of queue. Side must be one of - 'front' or 'back'. - - Expired items are deleted from cache. Operation is atomic. Concurrent - operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.pull` and `Cache.push`. - - >>> cache = Cache() - >>> for letter in 'abc': - ... print(cache.push(letter)) - 500000000000000 - 500000000000001 - 500000000000002 - >>> key, value = cache.peek() - >>> print(key) - 500000000000000 - >>> value - 'a' - >>> key, value = cache.peek(side='back') - >>> print(key) - 500000000000002 - >>> value - 'c' - - :param str prefix: key prefix (default None, key is integer) - :param default: value to return if key is missing - (default (None, None)) - :param str side: either 'front' or 'back' (default 'front') - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair or default if queue is empty - :raises Timeout: if database timeout occurs - - """ - # Caution: Nearly identical code exists in Cache.pull - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - order = {'front': 'ASC', 'back': 'DESC'} - select = ( - 'SELECT rowid, key, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (min_key, max_key)).fetchall() - - if not rows: - return default - - ( - (rowid, key, db_expire, db_tag, mode, name, db_value), - ) = rows - - if db_expire is not None and db_expire < time.time(): - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(name) - else: - break - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def peekitem(self, last=True, expire_time=False, tag=False, retry=False): - """Peek at key and value item pair in cache based on iteration order. - - Expired items are deleted from cache. Operation is atomic. Concurrent - operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - >>> cache = Cache() - >>> for num, letter in enumerate('abc'): - ... cache[letter] = num - >>> cache.peekitem() - ('c', 2) - >>> cache.peekitem(last=False) - ('a', 0) - - :param bool last: last item in iteration order (default True) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair - :raises KeyError: if cache is empty - :raises Timeout: if database timeout occurs - - """ - order = ('ASC', 'DESC') - select = ( - 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' - ' FROM Cache ORDER BY rowid %s LIMIT 1' - ) % order[last] - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select).fetchall() - - if not rows: - raise KeyError('dictionary is empty') - - ( - ( - rowid, - db_key, - raw, - db_expire, - db_tag, - mode, - name, - db_value, - ), - ) = rows - - if db_expire is not None and db_expire < time.time(): - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(name) - else: - break - - key = self._disk.get(db_key, raw) - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def memoize( - self, name=None, typed=False, expire=None, tag=None, ignore=() - ): - """Memoizing cache decorator. - - Decorator to wrap callable with memoizing function using cache. - Repeated calls with the same arguments will lookup result in cache and - avoid function evaluation. - - If name is set to None (default), the callable name will be determined - automatically. - - When expire is set to zero, function results will not be set in the - cache. Cache lookups still occur, however. Read - :doc:`case-study-landing-page-caching` for example usage. - - If typed is set to True, function arguments of different types will be - cached separately. For example, f(3) and f(3.0) will be treated as - distinct calls with distinct results. - - The original underlying function is accessible through the __wrapped__ - attribute. This is useful for introspection, for bypassing the cache, - or for rewrapping the function with a different cache. - - >>> from diskcache import Cache - >>> cache = Cache() - >>> @cache.memoize(expire=1, tag='fib') - ... def fibonacci(number): - ... if number == 0: - ... return 0 - ... elif number == 1: - ... return 1 - ... else: - ... return fibonacci(number - 1) + fibonacci(number - 2) - >>> print(fibonacci(100)) - 354224848179261915075 - - An additional `__cache_key__` attribute can be used to generate the - cache key used for the given arguments. - - >>> key = fibonacci.__cache_key__(100) - >>> print(cache[key]) - 354224848179261915075 - - Remember to call memoize when decorating a callable. If you forget, - then a TypeError will occur. Note the lack of parenthenses after - memoize below: - - >>> @cache.memoize - ... def test(): - ... pass - Traceback (most recent call last): - ... - TypeError: name cannot be callable - - :param cache: cache to store callable arguments and return values - :param str name: name given for callable (default None, automatic) - :param bool typed: cache different types separately (default False) - :param float expire: seconds until arguments expire - (default None, no expiry) - :param str tag: text to associate with arguments (default None) - :param set ignore: positional or keyword args to ignore (default ()) - :return: callable decorator - - """ - # Caution: Nearly identical code exists in DjangoCache.memoize - if callable(name): - raise TypeError('name cannot be callable') - - def decorator(func): - """Decorator created by memoize() for callable `func`.""" - base = (full_name(func),) if name is None else (name,) - - @ft.wraps(func) - def wrapper(*args, **kwargs): - """Wrapper for callable to cache arguments and return values.""" - key = wrapper.__cache_key__(*args, **kwargs) - result = self.get(key, default=ENOVAL, retry=True) - - if result is ENOVAL: - result = func(*args, **kwargs) - if expire is None or expire > 0: - self.set(key, result, expire, tag=tag, retry=True) - - return result - - def __cache_key__(*args, **kwargs): - """Make key for cache given function arguments.""" - return args_to_key(base, args, kwargs, typed, ignore) - - wrapper.__cache_key__ = __cache_key__ - return wrapper - - return decorator - - def check(self, fix=False, retry=False): - """Check database and file system consistency. - - Intended for use in testing and post-mortem error analysis. - - While checking the Cache table for consistency, a writer lock is held - on the database. The lock blocks other cache clients from writing to - the database. For caches with many file references, the lock may be - held for a long time. For example, local benchmarking shows that a - cache with 1,000 file references takes ~60ms to check. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool fix: correct inconsistencies - :param bool retry: retry if database timeout occurs (default False) - :return: list of warnings - :raises Timeout: if database timeout occurs - - """ - # pylint: disable=access-member-before-definition,W0201 - with warnings.catch_warnings(record=True) as warns: - sql = self._sql - - # Check integrity of database. - - rows = sql('PRAGMA integrity_check').fetchall() - - if len(rows) != 1 or rows[0][0] != 'ok': - for (message,) in rows: - warnings.warn(message) - - if fix: - sql('VACUUM') - - with self._transact(retry) as (sql, _): - - # Check Cache.filename against file system. - - filenames = set() - select = ( - 'SELECT rowid, size, filename FROM Cache' - ' WHERE filename IS NOT NULL' - ) - - rows = sql(select).fetchall() - - for rowid, size, filename in rows: - full_path = op.join(self._directory, filename) - filenames.add(full_path) - - if op.exists(full_path): - real_size = op.getsize(full_path) - - if size != real_size: - message = 'wrong file size: %s, %d != %d' - args = full_path, real_size, size - warnings.warn(message % args) - - if fix: - sql( - 'UPDATE Cache SET size = ?' - ' WHERE rowid = ?', - (real_size, rowid), - ) - - continue - - warnings.warn('file not found: %s' % full_path) - - if fix: - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - # Check file system against Cache.filename. - - for dirpath, _, files in os.walk(self._directory): - paths = [op.join(dirpath, filename) for filename in files] - error = set(paths) - filenames - - for full_path in error: - if DBNAME in full_path: - continue - - message = 'unknown file: %s' % full_path - warnings.warn(message, UnknownFileWarning) - - if fix: - os.remove(full_path) - - # Check for empty directories. - - for dirpath, dirs, files in os.walk(self._directory): - if not (dirs or files): - message = 'empty directory: %s' % dirpath - warnings.warn(message, EmptyDirWarning) - - if fix: - os.rmdir(dirpath) - - # Check Settings.count against count of Cache rows. - - self.reset('count') - ((count,),) = sql('SELECT COUNT(key) FROM Cache').fetchall() - - if self.count != count: - message = 'Settings.count != COUNT(Cache.key); %d != %d' - warnings.warn(message % (self.count, count)) - - if fix: - sql( - 'UPDATE Settings SET value = ? WHERE key = ?', - (count, 'count'), - ) - - # Check Settings.size against sum of Cache.size column. - - self.reset('size') - select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' - ((size,),) = sql(select_size).fetchall() - - if self.size != size: - message = 'Settings.size != SUM(Cache.size); %d != %d' - warnings.warn(message % (self.size, size)) - - if fix: - sql( - 'UPDATE Settings SET value = ? WHERE key =?', - (size, 'size'), - ) - - return warns - - def create_tag_index(self): - """Create tag index on cache database. - - It is better to initialize cache with `tag_index=True` than use this. - - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') - self.reset('tag_index', 1) - - def drop_tag_index(self): - """Drop tag index on cache database. - - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql('DROP INDEX IF EXISTS Cache_tag_rowid') - self.reset('tag_index', 0) - - def evict(self, tag, retry=False): - """Remove items with matching `tag` from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param str tag: tag identifying items - :param bool retry: retry if database timeout occurs (default False) - :return: count of rows removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, filename FROM Cache' - ' WHERE tag = ? AND rowid > ?' - ' ORDER BY rowid LIMIT ?' - ) - args = [tag, 0, 100] - return self._select_delete(select, args, arg_index=1, retry=retry) - - def expire(self, now=None, retry=False): - """Remove expired items from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param float now: current time (default None, ``time.time()`` used) - :param bool retry: retry if database timeout occurs (default False) - :return: count of items removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, expire_time, filename FROM Cache' - ' WHERE ? < expire_time AND expire_time < ?' - ' ORDER BY expire_time LIMIT ?' - ) - args = [0, now or time.time(), 100] - return self._select_delete(select, args, row_index=1, retry=retry) - - def cull(self, retry=False): - """Cull items from cache until volume is less than size limit. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool retry: retry if database timeout occurs (default False) - :return: count of items removed - :raises Timeout: if database timeout occurs - - """ - now = time.time() - - # Remove expired items. - - count = self.expire(now) - - # Remove items by policy. - - select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] - - if select_policy is None: - return 0 - - select_filename = select_policy.format(fields='filename', now=now) - - try: - while self.volume() > self.size_limit: - with self._transact(retry) as (sql, cleanup): - rows = sql(select_filename, (10,)).fetchall() - - if not rows: - break - - count += len(rows) - delete = ( - 'DELETE FROM Cache WHERE rowid IN (%s)' - % select_policy.format(fields='rowid', now=now) - ) - sql(delete, (10,)) - - for (filename,) in rows: - cleanup(filename) - except Timeout: - raise Timeout(count) from None - - return count - - def clear(self, retry=False): - """Remove all items from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool retry: retry if database timeout occurs (default False) - :return: count of rows removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, filename FROM Cache' - ' WHERE rowid > ?' - ' ORDER BY rowid LIMIT ?' - ) - args = [0, 100] - return self._select_delete(select, args, retry=retry) - - def _select_delete( - self, select, args, row_index=0, arg_index=0, retry=False - ): - count = 0 - delete = 'DELETE FROM Cache WHERE rowid IN (%s)' - - try: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, args).fetchall() - - if not rows: - break - - count += len(rows) - sql(delete % ','.join(str(row[0]) for row in rows)) - - for row in rows: - args[arg_index] = row[row_index] - cleanup(row[-1]) - - except Timeout: - raise Timeout(count) from None - - return count - - def iterkeys(self, reverse=False): - """Iterate Cache keys in database sort order. - - >>> cache = Cache() - >>> for key in [4, 1, 3, 0, 2]: - ... cache[key] = key - >>> list(cache.iterkeys()) - [0, 1, 2, 3, 4] - >>> list(cache.iterkeys(reverse=True)) - [4, 3, 2, 1, 0] - - :param bool reverse: reverse sort order (default False) - :return: iterator of Cache keys - - """ - sql = self._sql - limit = 100 - _disk_get = self._disk.get - - if reverse: - select = ( - 'SELECT key, raw FROM Cache' - ' ORDER BY key DESC, raw DESC LIMIT 1' - ) - iterate = ( - 'SELECT key, raw FROM Cache' - ' WHERE key = ? AND raw < ? OR key < ?' - ' ORDER BY key DESC, raw DESC LIMIT ?' - ) - else: - select = ( - 'SELECT key, raw FROM Cache' - ' ORDER BY key ASC, raw ASC LIMIT 1' - ) - iterate = ( - 'SELECT key, raw FROM Cache' - ' WHERE key = ? AND raw > ? OR key > ?' - ' ORDER BY key ASC, raw ASC LIMIT ?' - ) - - row = sql(select).fetchall() - - if row: - ((key, raw),) = row - else: - return - - yield _disk_get(key, raw) - - while True: - rows = sql(iterate, (key, raw, key, limit)).fetchall() - - if not rows: - break - - for key, raw in rows: - yield _disk_get(key, raw) - - def _iter(self, ascending=True): - sql = self._sql - rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() - ((max_rowid,),) = rows - yield # Signal ready. - - if max_rowid is None: - return - - bound = max_rowid + 1 - limit = 100 - _disk_get = self._disk.get - rowid = 0 if ascending else bound - select = ( - 'SELECT rowid, key, raw FROM Cache' - ' WHERE ? < rowid AND rowid < ?' - ' ORDER BY rowid %s LIMIT ?' - ) % ('ASC' if ascending else 'DESC') - - while True: - if ascending: - args = (rowid, bound, limit) - else: - args = (0, rowid, limit) - - rows = sql(select, args).fetchall() - - if not rows: - break - - for rowid, key, raw in rows: - yield _disk_get(key, raw) - - def __iter__(self): - """Iterate keys in cache including expired items.""" - iterator = self._iter() - next(iterator) - return iterator - - def __reversed__(self): - """Reverse iterate keys in cache including expired items.""" - iterator = self._iter(ascending=False) - next(iterator) - return iterator - - def stats(self, enable=True, reset=False): - """Return cache statistics hits and misses. - - :param bool enable: enable collecting statistics (default True) - :param bool reset: reset hits and misses to 0 (default False) - :return: (hits, misses) - - """ - # pylint: disable=E0203,W0201 - result = (self.reset('hits'), self.reset('misses')) - - if reset: - self.reset('hits', 0) - self.reset('misses', 0) - - self.reset('statistics', enable) - - return result - - def volume(self): - """Return estimated total size of cache on disk. - - :return: size in bytes - - """ - ((page_count,),) = self._sql('PRAGMA page_count').fetchall() - total_size = self._page_size * page_count + self.reset('size') - return total_size - - def close(self): - """Close database connection.""" - con = getattr(self._local, 'con', None) - - if con is None: - return - - con.close() - - try: - delattr(self._local, 'con') - except AttributeError: - pass - - def __enter__(self): - # Create connection in thread. - # pylint: disable=unused-variable - connection = self._con # noqa - return self - - def __exit__(self, *exception): - self.close() - - def __len__(self): - """Count of items in cache including expired items.""" - return self.reset('count') - - def __getstate__(self): - return (self.directory, self.timeout, type(self.disk)) - - def __setstate__(self, state): - self.__init__(*state) - - def reset(self, key, value=ENOVAL, update=True): - """Reset `key` and `value` item from Settings table. - - Use `reset` to update the value of Cache settings correctly. Cache - settings are stored in the Settings table of the SQLite database. If - `update` is ``False`` then no attempt is made to update the database. - - If `value` is not given, it is reloaded from the Settings - table. Otherwise, the Settings table is updated. - - Settings with the ``disk_`` prefix correspond to Disk - attributes. Updating the value will change the unprefixed attribute on - the associated Disk instance. - - Settings with the ``sqlite_`` prefix correspond to SQLite - pragmas. Updating the value will execute the corresponding PRAGMA - statement. - - SQLite PRAGMA statements may be executed before the Settings table - exists in the database by setting `update` to ``False``. - - :param str key: Settings key for item - :param value: value for item (optional) - :param bool update: update database Settings table (default True) - :return: updated value for item - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql_retry = self._sql_retry - - if value is ENOVAL: - select = 'SELECT value FROM Settings WHERE key = ?' - ((value,),) = sql_retry(select, (key,)).fetchall() - setattr(self, key, value) - return value - - if update: - statement = 'UPDATE Settings SET value = ? WHERE key = ?' - sql_retry(statement, (value, key)) - - if key.startswith('sqlite_'): - pragma = key[7:] - - # 2016-02-17 GrantJ - PRAGMA and isolation_level=None - # don't always play nicely together. Retry setting the - # PRAGMA. I think some PRAGMA statements expect to - # immediately take an EXCLUSIVE lock on the database. I - # can't find any documentation for this but without the - # retry, stress will intermittently fail with multiple - # processes. - - # 2018-11-05 GrantJ - Avoid setting pragma values that - # are already set. Pragma settings like auto_vacuum and - # journal_mode can take a long time or may not work after - # tables have been created. - - start = time.time() - while True: - try: - try: - ((old_value,),) = sql( - 'PRAGMA %s' % (pragma) - ).fetchall() - update = old_value != value - except ValueError: - update = True - if update: - sql('PRAGMA %s = %s' % (pragma, value)).fetchall() - break - except sqlite3.OperationalError as exc: - if str(exc) != 'database is locked': - raise - diff = time.time() - start - if diff > 60: - raise - time.sleep(0.001) - elif key.startswith('disk_'): - attr = key[5:] - setattr(self._disk, attr, value) - - setattr(self, key, value) - return value +"""Core disk and file backed cache API. +""" + +import codecs +import contextlib as cl +import errno +import functools as ft +import io +import json +import os +import os.path as op +import pickle +import pickletools +import sqlite3 +import struct +import tempfile +import threading +import time +import warnings +import zlib + + +def full_name(func): + """Return full name of `func` by adding the module and function name.""" + return func.__module__ + '.' + func.__qualname__ + + +class Constant(tuple): + """Pretty display of immutable constant.""" + + def __new__(cls, name): + return tuple.__new__(cls, (name,)) + + def __repr__(self): + return '%s' % self[0] + + +DBNAME = 'cache.db' +ENOVAL = Constant('ENOVAL') +UNKNOWN = Constant('UNKNOWN') + +MODE_NONE = 0 +MODE_RAW = 1 +MODE_BINARY = 2 +MODE_TEXT = 3 +MODE_PICKLE = 4 + +DEFAULT_SETTINGS = { + 'statistics': 0, # False + 'tag_index': 0, # False + 'eviction_policy': 'least-recently-stored', + 'size_limit': 2**30, # 1gb + 'cull_limit': 10, + 'sqlite_auto_vacuum': 1, # FULL + 'sqlite_cache_size': 2**13, # 8,192 pages + 'sqlite_journal_mode': 'wal', + 'sqlite_mmap_size': 2**26, # 64mb + 'sqlite_synchronous': 1, # NORMAL + 'disk_min_file_size': 2**15, # 32kb + 'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, +} + +METADATA = { + 'count': 0, + 'size': 0, + 'hits': 0, + 'misses': 0, +} + +EVICTION_POLICY = { + 'none': { + 'init': None, + 'get': None, + 'cull': None, + }, + 'least-recently-stored': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_store_time ON' + ' Cache (store_time)' + ), + 'get': None, + 'cull': 'SELECT {fields} FROM Cache ORDER BY store_time LIMIT ?', + }, + 'least-recently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_time ON' + ' Cache (access_time)' + ), + 'get': 'access_time = {now}', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_time LIMIT ?', + }, + 'least-frequently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_count ON' + ' Cache (access_count)' + ), + 'get': 'access_count = access_count + 1', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_count LIMIT ?', + }, +} + + +class Disk: + """Cache key and value serialization for SQLite database and files.""" + + def __init__(self, directory, min_file_size=0, pickle_protocol=0): + """Initialize disk instance. + + :param str directory: directory path + :param int min_file_size: minimum size for file use + :param int pickle_protocol: pickle protocol for serialization + + """ + self._directory = directory + self.min_file_size = min_file_size + self.pickle_protocol = pickle_protocol + + def hash(self, key): + """Compute portable hash for `key`. + + :param key: key to hash + :return: hash value + + """ + mask = 0xFFFFFFFF + disk_key, _ = self.put(key) + type_disk_key = type(disk_key) + + if type_disk_key is sqlite3.Binary: + return zlib.adler32(disk_key) & mask + elif type_disk_key is str: + return zlib.adler32(disk_key.encode('utf-8')) & mask # noqa + elif type_disk_key is int: + return disk_key % mask + else: + assert type_disk_key is float + return zlib.adler32(struct.pack('!d', disk_key)) & mask + + def put(self, key): + """Convert `key` to fields key and raw for Cache table. + + :param key: key to convert + :return: (database key, raw boolean) pair + + """ + # pylint: disable=unidiomatic-typecheck + type_key = type(key) + + if type_key is bytes: + return sqlite3.Binary(key), True + elif ( + (type_key is str) + or ( + type_key is int + and -9223372036854775808 <= key <= 9223372036854775807 + ) + or (type_key is float) + ): + return key, True + else: + data = pickle.dumps(key, protocol=self.pickle_protocol) + result = pickletools.optimize(data) + return sqlite3.Binary(result), False + + def get(self, key, raw): + """Convert fields `key` and `raw` from Cache table to key. + + :param key: database key to convert + :param bool raw: flag indicating raw database storage + :return: corresponding Python key + + """ + # pylint: disable=unidiomatic-typecheck + if raw: + return bytes(key) if type(key) is sqlite3.Binary else key + else: + return pickle.load(io.BytesIO(key)) + + def store(self, value, read, key=UNKNOWN): + """Convert `value` to fields size, mode, filename, and value for Cache + table. + + :param value: value to convert + :param bool read: True when value is file-like object + :param key: key for item (default UNKNOWN) + :return: (size, mode, filename, value) tuple for Cache table + + """ + # pylint: disable=unidiomatic-typecheck + type_value = type(value) + min_file_size = self.min_file_size + + if ( + (type_value is str and len(value) < min_file_size) + or ( + type_value is int + and -9223372036854775808 <= value <= 9223372036854775807 + ) + or (type_value is float) + ): + return 0, MODE_RAW, None, value + elif type_value is bytes: + if len(value) < min_file_size: + return 0, MODE_RAW, None, sqlite3.Binary(value) + else: + filename, full_path = self.filename(key, value) + self._write(full_path, io.BytesIO(value), 'xb') + return len(value), MODE_BINARY, filename, None + elif type_value is str: + filename, full_path = self.filename(key, value) + self._write(full_path, io.StringIO(value), 'x', 'UTF-8') + size = op.getsize(full_path) + return size, MODE_TEXT, filename, None + elif read: + reader = ft.partial(value.read, 2**22) + filename, full_path = self.filename(key, value) + iterator = iter(reader, b'') + size = self._write(full_path, iterator, 'xb') + return size, MODE_BINARY, filename, None + else: + result = pickle.dumps(value, protocol=self.pickle_protocol) + + if len(result) < min_file_size: + return 0, MODE_PICKLE, None, sqlite3.Binary(result) + else: + filename, full_path = self.filename(key, value) + self._write(full_path, io.BytesIO(result), 'xb') + return len(result), MODE_PICKLE, filename, None + + def _write(self, full_path, iterator, mode, encoding=None): + full_dir, _ = op.split(full_path) + + for count in range(1, 11): + with cl.suppress(OSError): + os.makedirs(full_dir) + + try: + # Another cache may have deleted the directory before + # the file could be opened. + writer = open(full_path, mode, encoding=encoding) + except OSError: + if count == 10: + # Give up after 10 tries to open the file. + raise + continue + + with writer: + size = 0 + for chunk in iterator: + size += len(chunk) + writer.write(chunk) + return size + + def fetch(self, mode, filename, value, read): + """Convert fields `mode`, `filename`, and `value` from Cache table to + value. + + :param int mode: value mode raw, binary, text, or pickle + :param str filename: filename of corresponding value + :param value: database value + :param bool read: when True, return an open file handle + :return: corresponding Python value + :raises: IOError if the value cannot be read + + """ + # pylint: disable=unidiomatic-typecheck,consider-using-with + if mode == MODE_RAW: + return bytes(value) if type(value) is sqlite3.Binary else value + elif mode == MODE_BINARY: + if read: + return open(op.join(self._directory, filename), 'rb') + else: + with open(op.join(self._directory, filename), 'rb') as reader: + return reader.read() + elif mode == MODE_TEXT: + full_path = op.join(self._directory, filename) + with open(full_path, 'r', encoding='UTF-8') as reader: + return reader.read() + elif mode == MODE_PICKLE: + if value is None: + with open(op.join(self._directory, filename), 'rb') as reader: + return pickle.load(reader) + else: + return pickle.load(io.BytesIO(value)) + + def filename(self, key=UNKNOWN, value=UNKNOWN): + """Return filename and full-path tuple for file storage. + + Filename will be a randomly generated 28 character hexadecimal string + with ".val" suffixed. Two levels of sub-directories will be used to + reduce the size of directories. On older filesystems, lookups in + directories with many files may be slow. + + The default implementation ignores the `key` and `value` parameters. + + In some scenarios, for example :meth:`Cache.push + `, the `key` or `value` may not be known when the + item is stored in the cache. + + :param key: key for item (default UNKNOWN) + :param value: value for item (default UNKNOWN) + + """ + # pylint: disable=unused-argument + hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') + sub_dir = op.join(hex_name[:2], hex_name[2:4]) + name = hex_name[4:] + '.val' + filename = op.join(sub_dir, name) + full_path = op.join(self._directory, filename) + return filename, full_path + + def remove(self, file_path): + """Remove a file given by `file_path`. + + This method is cross-thread and cross-process safe. If an OSError + occurs, it is suppressed. + + :param str file_path: relative path to file + + """ + full_path = op.join(self._directory, file_path) + full_dir, _ = op.split(full_path) + + # Suppress OSError that may occur if two caches attempt to delete the + # same file or directory at the same time. + + with cl.suppress(OSError): + os.remove(full_path) + + with cl.suppress(OSError): + os.removedirs(full_dir) + + +class JSONDisk(Disk): + """Cache key and value using JSON serialization with zlib compression.""" + + def __init__(self, directory, compress_level=1, **kwargs): + """Initialize JSON disk instance. + + Keys and values are compressed using the zlib library. The + `compress_level` is an integer from 0 to 9 controlling the level of + compression; 1 is fastest and produces the least compression, 9 is + slowest and produces the most compression, and 0 is no compression. + + :param str directory: directory path + :param int compress_level: zlib compression level (default 1) + :param kwargs: super class arguments + + """ + self.compress_level = compress_level + super().__init__(directory, **kwargs) + + def put(self, key): + json_bytes = json.dumps(key).encode('utf-8') + data = zlib.compress(json_bytes, self.compress_level) + return super().put(data) + + def get(self, key, raw): + data = super().get(key, raw) + return json.loads(zlib.decompress(data).decode('utf-8')) + + def store(self, value, read, key=UNKNOWN): + if not read: + json_bytes = json.dumps(value).encode('utf-8') + value = zlib.compress(json_bytes, self.compress_level) + return super().store(value, read, key=key) + + def fetch(self, mode, filename, value, read): + data = super().fetch(mode, filename, value, read) + if not read: + data = json.loads(zlib.decompress(data).decode('utf-8')) + return data + + +class Timeout(Exception): + """Database timeout expired.""" + + +class UnknownFileWarning(UserWarning): + """Warning used by Cache.check for unknown files.""" + + +class EmptyDirWarning(UserWarning): + """Warning used by Cache.check for empty directories.""" + + +def args_to_key(base, args, kwargs, typed, ignore): + """Create cache key out of function arguments. + + :param tuple base: base of key + :param tuple args: function arguments + :param dict kwargs: function keyword arguments + :param bool typed: include types in cache key + :param set ignore: positional or keyword args to ignore + :return: cache key tuple + + """ + args = tuple(arg for index, arg in enumerate(args) if index not in ignore) + key = base + args + (None,) + + if kwargs: + kwargs = {key: val for key, val in kwargs.items() if key not in ignore} + sorted_items = sorted(kwargs.items()) + + for item in sorted_items: + key += item + + if typed: + key += tuple(type(arg) for arg in args) + + if kwargs: + key += tuple(type(value) for _, value in sorted_items) + + return key + + +class Cache: + """Disk and file backed cache.""" + + def __init__(self, directory=None, timeout=60, disk=Disk, **settings): + """Initialize cache instance. + + :param str directory: cache directory + :param float timeout: SQLite connection timeout + :param disk: Disk type or subclass for serialization + :param settings: any of DEFAULT_SETTINGS + + """ + try: + assert issubclass(disk, Disk) + except (TypeError, AssertionError): + raise ValueError('disk must subclass diskcache.Disk') from None + + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = str(directory) + directory = op.expanduser(directory) + directory = op.expandvars(directory) + + self._directory = directory + self._timeout = 0 # Manually handle retries during initialization. + self._local = threading.local() + self._txn_id = None + + if not op.isdir(directory): + try: + os.makedirs(directory, 0o755) + except OSError as error: + if error.errno != errno.EEXIST: + raise EnvironmentError( + error.errno, + 'Cache directory "%s" does not exist' + ' and could not be created' % self._directory, + ) from None + + sql = self._sql_retry + + # Setup Settings table. + + try: + current_settings = dict( + sql('SELECT key, value FROM Settings').fetchall() + ) + except sqlite3.OperationalError: + current_settings = {} + + sets = DEFAULT_SETTINGS.copy() + sets.update(current_settings) + sets.update(settings) + + for key in METADATA: + sets.pop(key, None) + + # Chance to set pragmas before any tables are created. + + for key, value in sorted(sets.items()): + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + sql( + 'CREATE TABLE IF NOT EXISTS Settings (' + ' key TEXT NOT NULL UNIQUE,' + ' value)' + ) + + # Setup Disk object (must happen after settings initialized). + + kwargs = { + key[5:]: value + for key, value in sets.items() + if key.startswith('disk_') + } + self._disk = disk(directory, **kwargs) + + # Set cached attributes: updates settings and sets pragmas. + + for key, value in sets.items(): + query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key, value) + + for key, value in METADATA.items(): + query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key) + + ((self._page_size,),) = sql('PRAGMA page_size').fetchall() + + # Setup Cache table. + + sql( + 'CREATE TABLE IF NOT EXISTS Cache (' + ' rowid INTEGER PRIMARY KEY,' + ' key BLOB,' + ' raw INTEGER,' + ' store_time REAL,' + ' expire_time REAL,' + ' access_time REAL,' + ' access_count INTEGER DEFAULT 0,' + ' tag BLOB,' + ' size INTEGER DEFAULT 0,' + ' mode INTEGER DEFAULT 0,' + ' filename TEXT,' + ' value BLOB)' + ) + + sql( + 'CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' + ' Cache(key, raw)' + ) + + sql( + 'CREATE INDEX IF NOT EXISTS Cache_expire_time ON' + ' Cache (expire_time)' + ) + + query = EVICTION_POLICY[self.eviction_policy]['init'] + + if query is not None: + sql(query) + + # Use triggers to keep Metadata updated. + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + 1' + ' WHERE key = "count"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - 1' + ' WHERE key = "count"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + NEW.size' + ' WHERE key = "size"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_update' + ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings' + ' SET value = value + NEW.size - OLD.size' + ' WHERE key = "size"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - OLD.size' + ' WHERE key = "size"; END' + ) + + # Create tag index if requested. + + if self.tag_index: # pylint: disable=no-member + self.create_tag_index() + else: + self.drop_tag_index() + + # Close and re-open database connection with given timeout. + + self.close() + self._timeout = timeout + self._sql # pylint: disable=pointless-statement + + @property + def directory(self): + """Cache directory.""" + return self._directory + + @property + def timeout(self): + """SQLite connection timeout value in seconds.""" + return self._timeout + + @property + def disk(self): + """Disk used for serialization.""" + return self._disk + + @property + def _con(self): + # Check process ID to support process forking. If the process + # ID changes, close the connection and update the process ID. + + local_pid = getattr(self._local, 'pid', None) + pid = os.getpid() + + if local_pid != pid: + self.close() + self._local.pid = pid + + con = getattr(self._local, 'con', None) + + if con is None: + con = self._local.con = sqlite3.connect( + op.join(self._directory, DBNAME), + timeout=self._timeout, + isolation_level=None, + ) + + # Some SQLite pragmas work on a per-connection basis so + # query the Settings table and reset the pragmas. The + # Settings table may not exist so catch and ignore the + # OperationalError that may occur. + + try: + select = 'SELECT key, value FROM Settings' + settings = con.execute(select).fetchall() + except sqlite3.OperationalError: + pass + else: + for key, value in settings: + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + return con + + @property + def _sql(self): + return self._con.execute + + @property + def _sql_retry(self): + sql = self._sql + + # 2018-11-01 GrantJ - Some SQLite builds/versions handle + # the SQLITE_BUSY return value and connection parameter + # "timeout" differently. For a more reliable duration, + # manually retry the statement for 60 seconds. Only used + # by statements which modify the database and do not use + # a transaction (like those in ``__init__`` or ``reset``). + # See Issue #85 for and tests/issue_85.py for more details. + + def _execute_with_retry(statement, *args, **kwargs): + start = time.time() + while True: + try: + return sql(statement, *args, **kwargs) + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + + return _execute_with_retry + + @cl.contextmanager + def transact(self, retry=False): + """Context manager to perform a transaction by locking the cache. + + While the cache is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> with cache.transact(): # Atomically increment two keys. + ... _ = cache.incr('total', 123.4) + ... _ = cache.incr('count', 1) + >>> with cache.transact(): # Atomically calculate average. + ... average = cache['total'] / cache['count'] + >>> average + 123.4 + + :param bool retry: retry if database timeout occurs (default False) + :return: context manager for use in `with` statement + :raises Timeout: if database timeout occurs + + """ + with self._transact(retry=retry): + yield + + @cl.contextmanager + def _transact(self, retry=False, filename=None): + _acquireLock() + try: + sql = self._sql + filenames = [] + _disk_remove = self._disk.remove + tid = threading.get_ident() + txn_id = self._txn_id + + if tid == txn_id: + begin = False + else: + while True: + try: + sql('BEGIN IMMEDIATE') + begin = True + self._txn_id = tid + break + except sqlite3.OperationalError: + if retry: + continue + if filename is not None: + _disk_remove(filename) + raise Timeout from None + + try: + yield sql, filenames.append + except BaseException: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('ROLLBACK') + raise + else: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('COMMIT') + for name in filenames: + if name is not None: + _disk_remove(name) + finally: + _releaseLock() + + def set(self, key, value, expire=None, read=False, tag=None, retry=False): + """Set `key` and `value` item in cache. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was set + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + # The order of SELECT, UPDATE, and INSERT is important below. + # + # Typical cache usage pattern is: + # + # value = cache.get(key) + # if value is None: + # value = expensive_calculation() + # cache.set(key, value) + # + # Cache.get does not evict expired keys to avoid writes during lookups. + # Commonly used/expired keys will therefore remain in the cache making + # an UPDATE the preferred path. + # + # The alternative is to assume the key is not present by first trying + # to INSERT and then handling the IntegrityError that occurs from + # violating the UNIQUE constraint. This optimistic approach was + # rejected based on the common cache usage pattern. + # + # INSERT OR REPLACE aka UPSERT is not used because the old filename may + # need cleanup. + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_filename),) = rows + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + def __setitem__(self, key, value): + """Set corresponding `value` for `key` in cache. + + :param key: key for item + :param value: value for item + :return: corresponding value + :raises KeyError: if key is not found + + """ + self.set(key, value, retry=True) + + def _row_update(self, rowid, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql( + 'UPDATE Cache SET' + ' store_time = ?,' + ' expire_time = ?,' + ' access_time = ?,' + ' access_count = ?,' + ' tag = ?,' + ' size = ?,' + ' mode = ?,' + ' filename = ?,' + ' value = ?' + ' WHERE rowid = ?', + ( + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + rowid, + ), + ) + + def _row_insert(self, key, raw, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql( + 'INSERT INTO Cache(' + ' key, raw, store_time, expire_time, access_time,' + ' access_count, tag, size, mode, filename, value' + ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + key, + raw, + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + ), + ) + + def _cull(self, now, sql, cleanup, limit=None): + cull_limit = self.cull_limit if limit is None else limit + + if cull_limit == 0: + return + + # Evict expired keys. + + select_expired_template = ( + 'SELECT %s FROM Cache' + ' WHERE expire_time IS NOT NULL AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + + select_expired = select_expired_template % 'filename' + rows = sql(select_expired, (now, cull_limit)).fetchall() + + if rows: + delete_expired = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_expired_template % 'rowid' + ) + sql(delete_expired, (now, cull_limit)) + + for (filename,) in rows: + cleanup(filename) + + cull_limit -= len(rows) + + if cull_limit == 0: + return + + # Evict keys by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None or self.volume() < self.size_limit: + return + + select_filename = select_policy.format(fields='filename', now=now) + rows = sql(select_filename, (cull_limit,)).fetchall() + + if rows: + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_policy.format(fields='rowid', now=now) + ) + sql(delete, (cull_limit,)) + + for (filename,) in rows: + cleanup(filename) + + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + + with self._transact(retry) as (sql, _): + rows = sql( + 'SELECT rowid, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_expire_time),) = rows + + if old_expire_time is None or old_expire_time > now: + sql( + 'UPDATE Cache SET expire_time = ? WHERE rowid = ?', + (expire_time, rowid), + ) + return True + + return False + + def add(self, key, value, expire=None, read=False, tag=None, retry=False): + """Add `key` and `value` item to cache. + + Similar to `set`, but only add to cache if key not present. + + Operation is atomic. Only one concurrent add operation for a given key + will succeed. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was added + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_filename, old_expire_time),) = rows + + if old_expire_time is None or old_expire_time > now: + cleanup(filename) + return False + + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + def incr(self, key, delta=1, default=0, retry=False): + """Increment value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent increment operations will be + counted individually. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to increment (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, filename, value FROM Cache' + ' WHERE key = ? AND raw = ?' + ) + + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (db_key, raw)).fetchall() + + if not rows: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store( + value, False, key=key + ) + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + return value + + ((rowid, expire_time, filename, value),) = rows + + if expire_time is not None and expire_time < now: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store( + value, False, key=key + ) + self._row_update(rowid, now, columns) + self._cull(now, sql, cleanup) + cleanup(filename) + return value + + value += delta + + columns = 'store_time = ?, value = ?' + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + + if update_column is not None: + columns += ', ' + update_column.format(now=now) + + update = 'UPDATE Cache SET %s WHERE rowid = ?' % columns + sql(update, (now, value, rowid)) + + return value + + def decr(self, key, delta=1, default=0, retry=False): + """Decrement value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent decrement operations will be + counted individually. + + Unlike Memcached, negative values are supported. Value may be + decremented below zero. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to decrement (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + return self.incr(key, -delta, default, retry) + + def get( + self, + key, + default=None, + read=False, + expire_time=False, + tag=False, + retry=False, + ): + """Retrieve value from cache. If `key` is missing, return `default`. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool read: if True, return file handle to value + (default False) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = (default, None, None) + elif expire_time or tag: + default = (default, None) + + if not self.statistics and update_column is None: + # Fast path, no transaction necessary. + + rows = self._sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError: + # Key was deleted before we could retrieve result. + return default + + else: # Slow path, transaction required. + cache_hit = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "hits"' + ) + cache_miss = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' + ) + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + if self.statistics: + sql(cache_miss) + return default + + ( + (rowid, db_expire_time, db_tag, mode, filename, db_value), + ) = rows # noqa: E127 + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError: + # Key was deleted before we could retrieve result. + if self.statistics: + sql(cache_miss) + return default + + if self.statistics: + sql(cache_hit) + + now = time.time() + update = 'UPDATE Cache SET %s WHERE rowid = ?' + + if update_column is not None: + sql(update % update_column.format(now=now), (rowid,)) + + if expire_time and tag: + return (value, db_expire_time, db_tag) + elif expire_time: + return (value, db_expire_time) + elif tag: + return (value, db_tag) + else: + return value + + def __getitem__(self, key): + """Return corresponding value for `key` from cache. + + :param key: key matching item + :return: corresponding value + :raises KeyError: if key is not found + + """ + value = self.get(key, default=ENOVAL, retry=True) + if value is ENOVAL: + raise KeyError(key) + return value + + def read(self, key, retry=False): + """Return file handle value corresponding to `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: file open for reading in binary mode + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + handle = self.get(key, default=ENOVAL, read=True, retry=retry) + if handle is ENOVAL: + raise KeyError(key) + return handle + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key matching item + :return: True if key matching item + + """ + sql = self._sql + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + rows = sql(select, (db_key, raw, time.time())).fetchall() + + return bool(rows) + + def pop( + self, key, default=None, expire_time=False, tag=False, retry=False + ): # noqa: E501 + """Remove corresponding item for `key` from cache and return value. + + If `key` is missing, return `default`. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + try: + value = self._disk.fetch(mode, filename, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + return default + finally: + if filename is not None: + self._disk.remove(filename) + + if expire_time and tag: + return value, db_expire_time, db_tag + elif expire_time: + return value, db_expire_time + elif tag: + return value, db_tag + else: + return value + + def __delitem__(self, key, retry=True): + """Delete corresponding item for `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default `True`). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default True) + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + + with self._transact(retry) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)', + (db_key, raw, time.time()), + ).fetchall() + + if not rows: + raise KeyError(key) + + ((rowid, filename),) = rows + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(filename) + + return True + + def delete(self, key, retry=False): + """Delete corresponding item for `key` from cache. + + Missing keys are ignored. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was deleted + :raises Timeout: if database timeout occurs + + """ + # pylint: disable=unnecessary-dunder-call + try: + return self.__delitem__(key, retry=retry) + except KeyError: + return False + + def push( + self, + value, + prefix=None, + side='back', + expire=None, + read=False, + tag=None, + retry=False, + ): + """Push `value` onto `side` of queue identified by `prefix` in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + Defaults to pushing value on back of queue. Set side to 'front' to push + value on front of queue. Side must be one of 'back' or 'front'. + + Operation is atomic. Concurrent operations will be serialized. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull`. + + >>> cache = Cache() + >>> print(cache.push('first value')) + 500000000000000 + >>> cache.get(500000000000000) + 'first value' + >>> print(cache.push('second value')) + 500000000000001 + >>> print(cache.push('third value', side='front')) + 499999999999999 + >>> cache.push(1234, prefix='userids') + 'userids-500000000000000' + + :param value: value for item + :param str prefix: key prefix (default None, key is integer) + :param str side: either 'back' or 'front' (default 'back') + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: key for item in cache + :raises Timeout: if database timeout occurs + + """ + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + now = time.time() + raw = True + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read) + columns = (expire_time, tag, size, mode, filename, db_value) + order = {'back': 'DESC', 'front': 'ASC'} + select = ( + 'SELECT key FROM Cache' + ' WHERE ? < key AND key < ? AND raw = ?' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql(select, (min_key, max_key, raw)).fetchall() + + if rows: + ((key,),) = rows + + if prefix is not None: + num = int(key[(key.rfind('-') + 1) :]) + else: + num = key + + if side == 'back': + num += 1 + else: + assert side == 'front' + num -= 1 + else: + num = 500000000000000 + + if prefix is not None: + db_key = '{0}-{1:015d}'.format(prefix, num) + else: + db_key = num + + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + + return db_key + + def pull( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): + """Pull key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to pulling key and value item pairs from front of queue. Set + side to 'back' to pull from back of queue. Side must be one of 'front' + or 'back'. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.push` and `Cache.get`. + + >>> cache = Cache() + >>> cache.pull() + (None, None) + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.pull() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> _, value = cache.pull(side='back') + >>> value + 'c' + >>> cache.push(1234, 'userids') + 'userids-500000000000000' + >>> _, value = cache.pull('userids') + >>> value + 1234 + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.peek + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + if db_expire is not None and db_expire < time.time(): + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + finally: + if name is not None: + self._disk.remove(name) + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def peek( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): + """Peek at key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to peeking at key and value item pairs from front of queue. + Set side to 'back' to pull from back of queue. Side must be one of + 'front' or 'back'. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull` and `Cache.push`. + + >>> cache = Cache() + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.peek() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> key, value = cache.peek(side='back') + >>> print(key) + 500000000000002 + >>> value + 'c' + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.pull + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def peekitem(self, last=True, expire_time=False, tag=False, retry=False): + """Peek at key and value item pair in cache based on iteration order. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> for num, letter in enumerate('abc'): + ... cache[letter] = num + >>> cache.peekitem() + ('c', 2) + >>> cache.peekitem(last=False) + ('a', 0) + + :param bool last: last item in iteration order (default True) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair + :raises KeyError: if cache is empty + :raises Timeout: if database timeout occurs + + """ + order = ('ASC', 'DESC') + select = ( + 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' + ' FROM Cache ORDER BY rowid %s LIMIT 1' + ) % order[last] + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select).fetchall() + + if not rows: + raise KeyError('dictionary is empty') + + ( + ( + rowid, + db_key, + raw, + db_expire, + db_tag, + mode, + name, + db_value, + ), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + key = self._disk.get(db_key, raw) + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def memoize( + self, name=None, typed=False, expire=None, tag=None, ignore=() + ): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When expire is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @cache.memoize(expire=1, tag='fib') + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(cache[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @cache.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param cache: cache to store callable arguments and return values + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param float expire: seconds until arguments expire + (default None, no expiry) + :param str tag: text to associate with arguments (default None) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in DjangoCache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + """Decorator created by memoize() for callable `func`.""" + base = (full_name(func),) if name is None else (name,) + + @ft.wraps(func) + def wrapper(*args, **kwargs): + """Wrapper for callable to cache arguments and return values.""" + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, default=ENOVAL, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + if expire is None or expire > 0: + self.set(key, result, expire, tag=tag, retry=True) + + return result + + def __cache_key__(*args, **kwargs): + """Make key for cache given function arguments.""" + return args_to_key(base, args, kwargs, typed, ignore) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator + + def check(self, fix=False, retry=False): + """Check database and file system consistency. + + Intended for use in testing and post-mortem error analysis. + + While checking the Cache table for consistency, a writer lock is held + on the database. The lock blocks other cache clients from writing to + the database. For caches with many file references, the lock may be + held for a long time. For example, local benchmarking shows that a + cache with 1,000 file references takes ~60ms to check. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) + :return: list of warnings + :raises Timeout: if database timeout occurs + + """ + # pylint: disable=access-member-before-definition,W0201 + with warnings.catch_warnings(record=True) as warns: + sql = self._sql + + # Check integrity of database. + + rows = sql('PRAGMA integrity_check').fetchall() + + if len(rows) != 1 or rows[0][0] != 'ok': + for (message,) in rows: + warnings.warn(message) + + if fix: + sql('VACUUM') + + with self._transact(retry) as (sql, _): + + # Check Cache.filename against file system. + + filenames = set() + select = ( + 'SELECT rowid, size, filename FROM Cache' + ' WHERE filename IS NOT NULL' + ) + + rows = sql(select).fetchall() + + for rowid, size, filename in rows: + full_path = op.join(self._directory, filename) + filenames.add(full_path) + + if op.exists(full_path): + real_size = op.getsize(full_path) + + if size != real_size: + message = 'wrong file size: %s, %d != %d' + args = full_path, real_size, size + warnings.warn(message % args) + + if fix: + sql( + 'UPDATE Cache SET size = ?' + ' WHERE rowid = ?', + (real_size, rowid), + ) + + continue + + warnings.warn('file not found: %s' % full_path) + + if fix: + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + # Check file system against Cache.filename. + + for dirpath, _, files in os.walk(self._directory): + paths = [op.join(dirpath, filename) for filename in files] + error = set(paths) - filenames + + for full_path in error: + if DBNAME in full_path: + continue + + message = 'unknown file: %s' % full_path + warnings.warn(message, UnknownFileWarning) + + if fix: + os.remove(full_path) + + # Check for empty directories. + + for dirpath, dirs, files in os.walk(self._directory): + if not (dirs or files): + message = 'empty directory: %s' % dirpath + warnings.warn(message, EmptyDirWarning) + + if fix: + os.rmdir(dirpath) + + # Check Settings.count against count of Cache rows. + + self.reset('count') + ((count,),) = sql('SELECT COUNT(key) FROM Cache').fetchall() + + if self.count != count: + message = 'Settings.count != COUNT(Cache.key); %d != %d' + warnings.warn(message % (self.count, count)) + + if fix: + sql( + 'UPDATE Settings SET value = ? WHERE key = ?', + (count, 'count'), + ) + + # Check Settings.size against sum of Cache.size column. + + self.reset('size') + select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' + ((size,),) = sql(select_size).fetchall() + + if self.size != size: + message = 'Settings.size != SUM(Cache.size); %d != %d' + warnings.warn(message % (self.size, size)) + + if fix: + sql( + 'UPDATE Settings SET value = ? WHERE key =?', + (size, 'size'), + ) + + return warns + + def create_tag_index(self): + """Create tag index on cache database. + + It is better to initialize cache with `tag_index=True` than use this. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') + self.reset('tag_index', 1) + + def drop_tag_index(self): + """Drop tag index on cache database. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('DROP INDEX IF EXISTS Cache_tag_rowid') + self.reset('tag_index', 0) + + def evict(self, tag, retry=False): + """Remove items with matching `tag` from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE tag = ? AND rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [tag, 0, 100] + return self._select_delete(select, args, arg_index=1, retry=retry) + + def expire(self, now=None, retry=False): + """Remove expired items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param float now: current time (default None, ``time.time()`` used) + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, expire_time, filename FROM Cache' + ' WHERE ? < expire_time AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + args = [0, now or time.time(), 100] + return self._select_delete(select, args, row_index=1, retry=retry) + + def cull(self, retry=False): + """Cull items from cache until volume is less than size limit. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + now = time.time() + + # Remove expired items. + + count = self.expire(now) + + # Remove items by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None: + return 0 + + select_filename = select_policy.format(fields='filename', now=now) + + try: + while self.volume() > self.size_limit: + with self._transact(retry) as (sql, cleanup): + rows = sql(select_filename, (10,)).fetchall() + + if not rows: + break + + count += len(rows) + delete = ( + 'DELETE FROM Cache WHERE rowid IN (%s)' + % select_policy.format(fields='rowid', now=now) + ) + sql(delete, (10,)) + + for (filename,) in rows: + cleanup(filename) + except Timeout: + raise Timeout(count) from None + + return count + + def clear(self, retry=False): + """Remove all items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [0, 100] + return self._select_delete(select, args, retry=retry) + + def _select_delete( + self, select, args, row_index=0, arg_index=0, retry=False + ): + count = 0 + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' + + try: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, args).fetchall() + + if not rows: + break + + count += len(rows) + sql(delete % ','.join(str(row[0]) for row in rows)) + + for row in rows: + args[arg_index] = row[row_index] + cleanup(row[-1]) + + except Timeout: + raise Timeout(count) from None + + return count + + def iterkeys(self, reverse=False): + """Iterate Cache keys in database sort order. + + >>> cache = Cache() + >>> for key in [4, 1, 3, 0, 2]: + ... cache[key] = key + >>> list(cache.iterkeys()) + [0, 1, 2, 3, 4] + >>> list(cache.iterkeys(reverse=True)) + [4, 3, 2, 1, 0] + + :param bool reverse: reverse sort order (default False) + :return: iterator of Cache keys + + """ + sql = self._sql + limit = 100 + _disk_get = self._disk.get + + if reverse: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key DESC, raw DESC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw < ? OR key < ?' + ' ORDER BY key DESC, raw DESC LIMIT ?' + ) + else: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key ASC, raw ASC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw > ? OR key > ?' + ' ORDER BY key ASC, raw ASC LIMIT ?' + ) + + row = sql(select).fetchall() + + if row: + ((key, raw),) = row + else: + return + + yield _disk_get(key, raw) + + while True: + rows = sql(iterate, (key, raw, key, limit)).fetchall() + + if not rows: + break + + for key, raw in rows: + yield _disk_get(key, raw) + + def _iter(self, ascending=True): + sql = self._sql + rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() + ((max_rowid,),) = rows + yield # Signal ready. + + if max_rowid is None: + return + + bound = max_rowid + 1 + limit = 100 + _disk_get = self._disk.get + rowid = 0 if ascending else bound + select = ( + 'SELECT rowid, key, raw FROM Cache' + ' WHERE ? < rowid AND rowid < ?' + ' ORDER BY rowid %s LIMIT ?' + ) % ('ASC' if ascending else 'DESC') + + while True: + if ascending: + args = (rowid, bound, limit) + else: + args = (0, rowid, limit) + + rows = sql(select, args).fetchall() + + if not rows: + break + + for rowid, key, raw in rows: + yield _disk_get(key, raw) + + def __iter__(self): + """Iterate keys in cache including expired items.""" + iterator = self._iter() + next(iterator) + return iterator + + def __reversed__(self): + """Reverse iterate keys in cache including expired items.""" + iterator = self._iter(ascending=False) + next(iterator) + return iterator + + def stats(self, enable=True, reset=False): + """Return cache statistics hits and misses. + + :param bool enable: enable collecting statistics (default True) + :param bool reset: reset hits and misses to 0 (default False) + :return: (hits, misses) + + """ + # pylint: disable=E0203,W0201 + result = (self.reset('hits'), self.reset('misses')) + + if reset: + self.reset('hits', 0) + self.reset('misses', 0) + + self.reset('statistics', enable) + + return result + + def volume(self): + """Return estimated total size of cache on disk. + + :return: size in bytes + + """ + ((page_count,),) = self._sql('PRAGMA page_count').fetchall() + total_size = self._page_size * page_count + self.reset('size') + return total_size + + def close(self): + """Close database connection.""" + con = getattr(self._local, 'con', None) + + if con is None: + return + + con.close() + + try: + delattr(self._local, 'con') + except AttributeError: + pass + + def __enter__(self): + # Create connection in thread. + # pylint: disable=unused-variable + connection = self._con # noqa + return self + + def __exit__(self, *exception): + self.close() + + def __len__(self): + """Count of items in cache including expired items.""" + return self.reset('count') + + def __getstate__(self): + return (self.directory, self.timeout, type(self.disk)) + + def __setstate__(self, state): + self.__init__(*state) + + def reset(self, key, value=ENOVAL, update=True): + """Reset `key` and `value` item from Settings table. + + Use `reset` to update the value of Cache settings correctly. Cache + settings are stored in the Settings table of the SQLite database. If + `update` is ``False`` then no attempt is made to update the database. + + If `value` is not given, it is reloaded from the Settings + table. Otherwise, the Settings table is updated. + + Settings with the ``disk_`` prefix correspond to Disk + attributes. Updating the value will change the unprefixed attribute on + the associated Disk instance. + + Settings with the ``sqlite_`` prefix correspond to SQLite + pragmas. Updating the value will execute the corresponding PRAGMA + statement. + + SQLite PRAGMA statements may be executed before the Settings table + exists in the database by setting `update` to ``False``. + + :param str key: Settings key for item + :param value: value for item (optional) + :param bool update: update database Settings table (default True) + :return: updated value for item + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql_retry = self._sql_retry + + if value is ENOVAL: + select = 'SELECT value FROM Settings WHERE key = ?' + ((value,),) = sql_retry(select, (key,)).fetchall() + setattr(self, key, value) + return value + + if update: + statement = 'UPDATE Settings SET value = ? WHERE key = ?' + sql_retry(statement, (value, key)) + + if key.startswith('sqlite_'): + pragma = key[7:] + + # 2016-02-17 GrantJ - PRAGMA and isolation_level=None + # don't always play nicely together. Retry setting the + # PRAGMA. I think some PRAGMA statements expect to + # immediately take an EXCLUSIVE lock on the database. I + # can't find any documentation for this but without the + # retry, stress will intermittently fail with multiple + # processes. + + # 2018-11-05 GrantJ - Avoid setting pragma values that + # are already set. Pragma settings like auto_vacuum and + # journal_mode can take a long time or may not work after + # tables have been created. + + start = time.time() + while True: + try: + try: + ((old_value,),) = sql( + 'PRAGMA %s' % (pragma) + ).fetchall() + update = old_value != value + except ValueError: + update = True + if update: + sql('PRAGMA %s = %s' % (pragma, value)).fetchall() + break + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + elif key.startswith('disk_'): + attr = key[5:] + setattr(self._disk, attr, value) + + setattr(self, key, value) + return value + +if hasattr(os, 'register_at_fork'): + _lock = threading.RLock() + + def _acquireLock(): + global _lock + if _lock: + try: + _lock.acquire() + except BaseException: + _lock.release() + raise + + def _releaseLock(): + global _lock + if _lock: + _lock.release() + + def _after_at_fork_child_reinit_locks(): + global _lock + _lock = threading.RLock() + + os.register_at_fork(before=_acquireLock, + after_in_child=_after_at_fork_child_reinit_locks, + after_in_parent=_releaseLock) +else: + def _acquireLock(): + pass + + def _releaseLock(): + pass diff --git a/tests/test_fork_multithreading.py b/tests/test_fork_multithreading.py new file mode 100644 index 0000000..d103462 --- /dev/null +++ b/tests/test_fork_multithreading.py @@ -0,0 +1,74 @@ +""" +Test diskcache.core.Cache behaviour when process is forking. +Make sure it does not deadlock on the sqlite3 transaction lock if +forked while the lock is in use. +""" + +import errno +import hashlib +import io +import os +import os.path as op +import sys +import pathlib +import pickle +import shutil +import sqlite3 +import subprocess as sp +import tempfile +import threading +import time +import warnings +from threading import Thread +from unittest import mock + +if sys.platform != "win32": + import signal + +import pytest + +import diskcache as dc + +REPEATS = 1000 + +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) + +def _test_thread_imp(cache): + for i in range(REPEATS * 10): + cache.set(i, i) + +def _test_wait_pid(pid): + _, status = os.waitpid(pid, 0) + assert status == 0, "Child died unexpectedly" + +@pytest.mark.skipif(sys.platform == "win32", reason="skips this test on Windows") +def test_fork_multithreading(cache): + thread = Thread(target=_test_thread_imp, args=(cache,)) + thread.start() + try: + for i in range(REPEATS): + pid = os.fork() + if pid == 0: + cache.set(i, 0) + os._exit(0) + else: + thread = Thread(target=_test_wait_pid, args=(pid,)) + thread.start() + thread.join(timeout=10) + if thread.is_alive(): + os.kill(pid, signal.SIGKILL) + thread.join() + assert False, "Deadlock detected." + except OSError as e: + if e.errno != errno.EINTR: + raise + + thread.join() + +with dc.Cache() as cache: + test_fork_multithreading(cache) +shutil.rmtree(cache.directory, ignore_errors=True) From 70a916e2d3360e5e1e3930436c13790e57f2d98d Mon Sep 17 00:00:00 2001 From: Ariel Eizenberg Date: Mon, 25 Dec 2023 07:45:26 +0200 Subject: [PATCH 2/5] Fix accidental change of LF to CR-LF Previous commit was on Windows after testing there too --- diskcache/core.py | 4974 ++++++++++++++++++++++----------------------- 1 file changed, 2487 insertions(+), 2487 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index 80911a7..6633e93 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -1,2487 +1,2487 @@ -"""Core disk and file backed cache API. -""" - -import codecs -import contextlib as cl -import errno -import functools as ft -import io -import json -import os -import os.path as op -import pickle -import pickletools -import sqlite3 -import struct -import tempfile -import threading -import time -import warnings -import zlib - - -def full_name(func): - """Return full name of `func` by adding the module and function name.""" - return func.__module__ + '.' + func.__qualname__ - - -class Constant(tuple): - """Pretty display of immutable constant.""" - - def __new__(cls, name): - return tuple.__new__(cls, (name,)) - - def __repr__(self): - return '%s' % self[0] - - -DBNAME = 'cache.db' -ENOVAL = Constant('ENOVAL') -UNKNOWN = Constant('UNKNOWN') - -MODE_NONE = 0 -MODE_RAW = 1 -MODE_BINARY = 2 -MODE_TEXT = 3 -MODE_PICKLE = 4 - -DEFAULT_SETTINGS = { - 'statistics': 0, # False - 'tag_index': 0, # False - 'eviction_policy': 'least-recently-stored', - 'size_limit': 2**30, # 1gb - 'cull_limit': 10, - 'sqlite_auto_vacuum': 1, # FULL - 'sqlite_cache_size': 2**13, # 8,192 pages - 'sqlite_journal_mode': 'wal', - 'sqlite_mmap_size': 2**26, # 64mb - 'sqlite_synchronous': 1, # NORMAL - 'disk_min_file_size': 2**15, # 32kb - 'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, -} - -METADATA = { - 'count': 0, - 'size': 0, - 'hits': 0, - 'misses': 0, -} - -EVICTION_POLICY = { - 'none': { - 'init': None, - 'get': None, - 'cull': None, - }, - 'least-recently-stored': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_store_time ON' - ' Cache (store_time)' - ), - 'get': None, - 'cull': 'SELECT {fields} FROM Cache ORDER BY store_time LIMIT ?', - }, - 'least-recently-used': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_access_time ON' - ' Cache (access_time)' - ), - 'get': 'access_time = {now}', - 'cull': 'SELECT {fields} FROM Cache ORDER BY access_time LIMIT ?', - }, - 'least-frequently-used': { - 'init': ( - 'CREATE INDEX IF NOT EXISTS Cache_access_count ON' - ' Cache (access_count)' - ), - 'get': 'access_count = access_count + 1', - 'cull': 'SELECT {fields} FROM Cache ORDER BY access_count LIMIT ?', - }, -} - - -class Disk: - """Cache key and value serialization for SQLite database and files.""" - - def __init__(self, directory, min_file_size=0, pickle_protocol=0): - """Initialize disk instance. - - :param str directory: directory path - :param int min_file_size: minimum size for file use - :param int pickle_protocol: pickle protocol for serialization - - """ - self._directory = directory - self.min_file_size = min_file_size - self.pickle_protocol = pickle_protocol - - def hash(self, key): - """Compute portable hash for `key`. - - :param key: key to hash - :return: hash value - - """ - mask = 0xFFFFFFFF - disk_key, _ = self.put(key) - type_disk_key = type(disk_key) - - if type_disk_key is sqlite3.Binary: - return zlib.adler32(disk_key) & mask - elif type_disk_key is str: - return zlib.adler32(disk_key.encode('utf-8')) & mask # noqa - elif type_disk_key is int: - return disk_key % mask - else: - assert type_disk_key is float - return zlib.adler32(struct.pack('!d', disk_key)) & mask - - def put(self, key): - """Convert `key` to fields key and raw for Cache table. - - :param key: key to convert - :return: (database key, raw boolean) pair - - """ - # pylint: disable=unidiomatic-typecheck - type_key = type(key) - - if type_key is bytes: - return sqlite3.Binary(key), True - elif ( - (type_key is str) - or ( - type_key is int - and -9223372036854775808 <= key <= 9223372036854775807 - ) - or (type_key is float) - ): - return key, True - else: - data = pickle.dumps(key, protocol=self.pickle_protocol) - result = pickletools.optimize(data) - return sqlite3.Binary(result), False - - def get(self, key, raw): - """Convert fields `key` and `raw` from Cache table to key. - - :param key: database key to convert - :param bool raw: flag indicating raw database storage - :return: corresponding Python key - - """ - # pylint: disable=unidiomatic-typecheck - if raw: - return bytes(key) if type(key) is sqlite3.Binary else key - else: - return pickle.load(io.BytesIO(key)) - - def store(self, value, read, key=UNKNOWN): - """Convert `value` to fields size, mode, filename, and value for Cache - table. - - :param value: value to convert - :param bool read: True when value is file-like object - :param key: key for item (default UNKNOWN) - :return: (size, mode, filename, value) tuple for Cache table - - """ - # pylint: disable=unidiomatic-typecheck - type_value = type(value) - min_file_size = self.min_file_size - - if ( - (type_value is str and len(value) < min_file_size) - or ( - type_value is int - and -9223372036854775808 <= value <= 9223372036854775807 - ) - or (type_value is float) - ): - return 0, MODE_RAW, None, value - elif type_value is bytes: - if len(value) < min_file_size: - return 0, MODE_RAW, None, sqlite3.Binary(value) - else: - filename, full_path = self.filename(key, value) - self._write(full_path, io.BytesIO(value), 'xb') - return len(value), MODE_BINARY, filename, None - elif type_value is str: - filename, full_path = self.filename(key, value) - self._write(full_path, io.StringIO(value), 'x', 'UTF-8') - size = op.getsize(full_path) - return size, MODE_TEXT, filename, None - elif read: - reader = ft.partial(value.read, 2**22) - filename, full_path = self.filename(key, value) - iterator = iter(reader, b'') - size = self._write(full_path, iterator, 'xb') - return size, MODE_BINARY, filename, None - else: - result = pickle.dumps(value, protocol=self.pickle_protocol) - - if len(result) < min_file_size: - return 0, MODE_PICKLE, None, sqlite3.Binary(result) - else: - filename, full_path = self.filename(key, value) - self._write(full_path, io.BytesIO(result), 'xb') - return len(result), MODE_PICKLE, filename, None - - def _write(self, full_path, iterator, mode, encoding=None): - full_dir, _ = op.split(full_path) - - for count in range(1, 11): - with cl.suppress(OSError): - os.makedirs(full_dir) - - try: - # Another cache may have deleted the directory before - # the file could be opened. - writer = open(full_path, mode, encoding=encoding) - except OSError: - if count == 10: - # Give up after 10 tries to open the file. - raise - continue - - with writer: - size = 0 - for chunk in iterator: - size += len(chunk) - writer.write(chunk) - return size - - def fetch(self, mode, filename, value, read): - """Convert fields `mode`, `filename`, and `value` from Cache table to - value. - - :param int mode: value mode raw, binary, text, or pickle - :param str filename: filename of corresponding value - :param value: database value - :param bool read: when True, return an open file handle - :return: corresponding Python value - :raises: IOError if the value cannot be read - - """ - # pylint: disable=unidiomatic-typecheck,consider-using-with - if mode == MODE_RAW: - return bytes(value) if type(value) is sqlite3.Binary else value - elif mode == MODE_BINARY: - if read: - return open(op.join(self._directory, filename), 'rb') - else: - with open(op.join(self._directory, filename), 'rb') as reader: - return reader.read() - elif mode == MODE_TEXT: - full_path = op.join(self._directory, filename) - with open(full_path, 'r', encoding='UTF-8') as reader: - return reader.read() - elif mode == MODE_PICKLE: - if value is None: - with open(op.join(self._directory, filename), 'rb') as reader: - return pickle.load(reader) - else: - return pickle.load(io.BytesIO(value)) - - def filename(self, key=UNKNOWN, value=UNKNOWN): - """Return filename and full-path tuple for file storage. - - Filename will be a randomly generated 28 character hexadecimal string - with ".val" suffixed. Two levels of sub-directories will be used to - reduce the size of directories. On older filesystems, lookups in - directories with many files may be slow. - - The default implementation ignores the `key` and `value` parameters. - - In some scenarios, for example :meth:`Cache.push - `, the `key` or `value` may not be known when the - item is stored in the cache. - - :param key: key for item (default UNKNOWN) - :param value: value for item (default UNKNOWN) - - """ - # pylint: disable=unused-argument - hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') - sub_dir = op.join(hex_name[:2], hex_name[2:4]) - name = hex_name[4:] + '.val' - filename = op.join(sub_dir, name) - full_path = op.join(self._directory, filename) - return filename, full_path - - def remove(self, file_path): - """Remove a file given by `file_path`. - - This method is cross-thread and cross-process safe. If an OSError - occurs, it is suppressed. - - :param str file_path: relative path to file - - """ - full_path = op.join(self._directory, file_path) - full_dir, _ = op.split(full_path) - - # Suppress OSError that may occur if two caches attempt to delete the - # same file or directory at the same time. - - with cl.suppress(OSError): - os.remove(full_path) - - with cl.suppress(OSError): - os.removedirs(full_dir) - - -class JSONDisk(Disk): - """Cache key and value using JSON serialization with zlib compression.""" - - def __init__(self, directory, compress_level=1, **kwargs): - """Initialize JSON disk instance. - - Keys and values are compressed using the zlib library. The - `compress_level` is an integer from 0 to 9 controlling the level of - compression; 1 is fastest and produces the least compression, 9 is - slowest and produces the most compression, and 0 is no compression. - - :param str directory: directory path - :param int compress_level: zlib compression level (default 1) - :param kwargs: super class arguments - - """ - self.compress_level = compress_level - super().__init__(directory, **kwargs) - - def put(self, key): - json_bytes = json.dumps(key).encode('utf-8') - data = zlib.compress(json_bytes, self.compress_level) - return super().put(data) - - def get(self, key, raw): - data = super().get(key, raw) - return json.loads(zlib.decompress(data).decode('utf-8')) - - def store(self, value, read, key=UNKNOWN): - if not read: - json_bytes = json.dumps(value).encode('utf-8') - value = zlib.compress(json_bytes, self.compress_level) - return super().store(value, read, key=key) - - def fetch(self, mode, filename, value, read): - data = super().fetch(mode, filename, value, read) - if not read: - data = json.loads(zlib.decompress(data).decode('utf-8')) - return data - - -class Timeout(Exception): - """Database timeout expired.""" - - -class UnknownFileWarning(UserWarning): - """Warning used by Cache.check for unknown files.""" - - -class EmptyDirWarning(UserWarning): - """Warning used by Cache.check for empty directories.""" - - -def args_to_key(base, args, kwargs, typed, ignore): - """Create cache key out of function arguments. - - :param tuple base: base of key - :param tuple args: function arguments - :param dict kwargs: function keyword arguments - :param bool typed: include types in cache key - :param set ignore: positional or keyword args to ignore - :return: cache key tuple - - """ - args = tuple(arg for index, arg in enumerate(args) if index not in ignore) - key = base + args + (None,) - - if kwargs: - kwargs = {key: val for key, val in kwargs.items() if key not in ignore} - sorted_items = sorted(kwargs.items()) - - for item in sorted_items: - key += item - - if typed: - key += tuple(type(arg) for arg in args) - - if kwargs: - key += tuple(type(value) for _, value in sorted_items) - - return key - - -class Cache: - """Disk and file backed cache.""" - - def __init__(self, directory=None, timeout=60, disk=Disk, **settings): - """Initialize cache instance. - - :param str directory: cache directory - :param float timeout: SQLite connection timeout - :param disk: Disk type or subclass for serialization - :param settings: any of DEFAULT_SETTINGS - - """ - try: - assert issubclass(disk, Disk) - except (TypeError, AssertionError): - raise ValueError('disk must subclass diskcache.Disk') from None - - if directory is None: - directory = tempfile.mkdtemp(prefix='diskcache-') - directory = str(directory) - directory = op.expanduser(directory) - directory = op.expandvars(directory) - - self._directory = directory - self._timeout = 0 # Manually handle retries during initialization. - self._local = threading.local() - self._txn_id = None - - if not op.isdir(directory): - try: - os.makedirs(directory, 0o755) - except OSError as error: - if error.errno != errno.EEXIST: - raise EnvironmentError( - error.errno, - 'Cache directory "%s" does not exist' - ' and could not be created' % self._directory, - ) from None - - sql = self._sql_retry - - # Setup Settings table. - - try: - current_settings = dict( - sql('SELECT key, value FROM Settings').fetchall() - ) - except sqlite3.OperationalError: - current_settings = {} - - sets = DEFAULT_SETTINGS.copy() - sets.update(current_settings) - sets.update(settings) - - for key in METADATA: - sets.pop(key, None) - - # Chance to set pragmas before any tables are created. - - for key, value in sorted(sets.items()): - if key.startswith('sqlite_'): - self.reset(key, value, update=False) - - sql( - 'CREATE TABLE IF NOT EXISTS Settings (' - ' key TEXT NOT NULL UNIQUE,' - ' value)' - ) - - # Setup Disk object (must happen after settings initialized). - - kwargs = { - key[5:]: value - for key, value in sets.items() - if key.startswith('disk_') - } - self._disk = disk(directory, **kwargs) - - # Set cached attributes: updates settings and sets pragmas. - - for key, value in sets.items(): - query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key, value) - - for key, value in METADATA.items(): - query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' - sql(query, (key, value)) - self.reset(key) - - ((self._page_size,),) = sql('PRAGMA page_size').fetchall() - - # Setup Cache table. - - sql( - 'CREATE TABLE IF NOT EXISTS Cache (' - ' rowid INTEGER PRIMARY KEY,' - ' key BLOB,' - ' raw INTEGER,' - ' store_time REAL,' - ' expire_time REAL,' - ' access_time REAL,' - ' access_count INTEGER DEFAULT 0,' - ' tag BLOB,' - ' size INTEGER DEFAULT 0,' - ' mode INTEGER DEFAULT 0,' - ' filename TEXT,' - ' value BLOB)' - ) - - sql( - 'CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' - ' Cache(key, raw)' - ) - - sql( - 'CREATE INDEX IF NOT EXISTS Cache_expire_time ON' - ' Cache (expire_time)' - ) - - query = EVICTION_POLICY[self.eviction_policy]['init'] - - if query is not None: - sql(query) - - # Use triggers to keep Metadata updated. - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_count_insert' - ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value + 1' - ' WHERE key = "count"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_count_delete' - ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value - 1' - ' WHERE key = "count"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_insert' - ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value + NEW.size' - ' WHERE key = "size"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_update' - ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings' - ' SET value = value + NEW.size - OLD.size' - ' WHERE key = "size"; END' - ) - - sql( - 'CREATE TRIGGER IF NOT EXISTS Settings_size_delete' - ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' - ' UPDATE Settings SET value = value - OLD.size' - ' WHERE key = "size"; END' - ) - - # Create tag index if requested. - - if self.tag_index: # pylint: disable=no-member - self.create_tag_index() - else: - self.drop_tag_index() - - # Close and re-open database connection with given timeout. - - self.close() - self._timeout = timeout - self._sql # pylint: disable=pointless-statement - - @property - def directory(self): - """Cache directory.""" - return self._directory - - @property - def timeout(self): - """SQLite connection timeout value in seconds.""" - return self._timeout - - @property - def disk(self): - """Disk used for serialization.""" - return self._disk - - @property - def _con(self): - # Check process ID to support process forking. If the process - # ID changes, close the connection and update the process ID. - - local_pid = getattr(self._local, 'pid', None) - pid = os.getpid() - - if local_pid != pid: - self.close() - self._local.pid = pid - - con = getattr(self._local, 'con', None) - - if con is None: - con = self._local.con = sqlite3.connect( - op.join(self._directory, DBNAME), - timeout=self._timeout, - isolation_level=None, - ) - - # Some SQLite pragmas work on a per-connection basis so - # query the Settings table and reset the pragmas. The - # Settings table may not exist so catch and ignore the - # OperationalError that may occur. - - try: - select = 'SELECT key, value FROM Settings' - settings = con.execute(select).fetchall() - except sqlite3.OperationalError: - pass - else: - for key, value in settings: - if key.startswith('sqlite_'): - self.reset(key, value, update=False) - - return con - - @property - def _sql(self): - return self._con.execute - - @property - def _sql_retry(self): - sql = self._sql - - # 2018-11-01 GrantJ - Some SQLite builds/versions handle - # the SQLITE_BUSY return value and connection parameter - # "timeout" differently. For a more reliable duration, - # manually retry the statement for 60 seconds. Only used - # by statements which modify the database and do not use - # a transaction (like those in ``__init__`` or ``reset``). - # See Issue #85 for and tests/issue_85.py for more details. - - def _execute_with_retry(statement, *args, **kwargs): - start = time.time() - while True: - try: - return sql(statement, *args, **kwargs) - except sqlite3.OperationalError as exc: - if str(exc) != 'database is locked': - raise - diff = time.time() - start - if diff > 60: - raise - time.sleep(0.001) - - return _execute_with_retry - - @cl.contextmanager - def transact(self, retry=False): - """Context manager to perform a transaction by locking the cache. - - While the cache is locked, no other write operation is permitted. - Transactions should therefore be as short as possible. Read and write - operations performed in a transaction are atomic. Read operations may - occur concurrent to a transaction. - - Transactions may be nested and may not be shared between threads. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - >>> cache = Cache() - >>> with cache.transact(): # Atomically increment two keys. - ... _ = cache.incr('total', 123.4) - ... _ = cache.incr('count', 1) - >>> with cache.transact(): # Atomically calculate average. - ... average = cache['total'] / cache['count'] - >>> average - 123.4 - - :param bool retry: retry if database timeout occurs (default False) - :return: context manager for use in `with` statement - :raises Timeout: if database timeout occurs - - """ - with self._transact(retry=retry): - yield - - @cl.contextmanager - def _transact(self, retry=False, filename=None): - _acquireLock() - try: - sql = self._sql - filenames = [] - _disk_remove = self._disk.remove - tid = threading.get_ident() - txn_id = self._txn_id - - if tid == txn_id: - begin = False - else: - while True: - try: - sql('BEGIN IMMEDIATE') - begin = True - self._txn_id = tid - break - except sqlite3.OperationalError: - if retry: - continue - if filename is not None: - _disk_remove(filename) - raise Timeout from None - - try: - yield sql, filenames.append - except BaseException: - if begin: - assert self._txn_id == tid - self._txn_id = None - sql('ROLLBACK') - raise - else: - if begin: - assert self._txn_id == tid - self._txn_id = None - sql('COMMIT') - for name in filenames: - if name is not None: - _disk_remove(name) - finally: - _releaseLock() - - def set(self, key, value, expire=None, read=False, tag=None, retry=False): - """Set `key` and `value` item in cache. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param value: value for item - :param float expire: seconds until item expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was set - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read, key=key) - columns = (expire_time, tag, size, mode, filename, db_value) - - # The order of SELECT, UPDATE, and INSERT is important below. - # - # Typical cache usage pattern is: - # - # value = cache.get(key) - # if value is None: - # value = expensive_calculation() - # cache.set(key, value) - # - # Cache.get does not evict expired keys to avoid writes during lookups. - # Commonly used/expired keys will therefore remain in the cache making - # an UPDATE the preferred path. - # - # The alternative is to assume the key is not present by first trying - # to INSERT and then handling the IntegrityError that occurs from - # violating the UNIQUE constraint. This optimistic approach was - # rejected based on the common cache usage pattern. - # - # INSERT OR REPLACE aka UPSERT is not used because the old filename may - # need cleanup. - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_filename),) = rows - cleanup(old_filename) - self._row_update(rowid, now, columns) - else: - self._row_insert(db_key, raw, now, columns) - - self._cull(now, sql, cleanup) - - return True - - def __setitem__(self, key, value): - """Set corresponding `value` for `key` in cache. - - :param key: key for item - :param value: value for item - :return: corresponding value - :raises KeyError: if key is not found - - """ - self.set(key, value, retry=True) - - def _row_update(self, rowid, now, columns): - sql = self._sql - expire_time, tag, size, mode, filename, value = columns - sql( - 'UPDATE Cache SET' - ' store_time = ?,' - ' expire_time = ?,' - ' access_time = ?,' - ' access_count = ?,' - ' tag = ?,' - ' size = ?,' - ' mode = ?,' - ' filename = ?,' - ' value = ?' - ' WHERE rowid = ?', - ( - now, # store_time - expire_time, - now, # access_time - 0, # access_count - tag, - size, - mode, - filename, - value, - rowid, - ), - ) - - def _row_insert(self, key, raw, now, columns): - sql = self._sql - expire_time, tag, size, mode, filename, value = columns - sql( - 'INSERT INTO Cache(' - ' key, raw, store_time, expire_time, access_time,' - ' access_count, tag, size, mode, filename, value' - ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', - ( - key, - raw, - now, # store_time - expire_time, - now, # access_time - 0, # access_count - tag, - size, - mode, - filename, - value, - ), - ) - - def _cull(self, now, sql, cleanup, limit=None): - cull_limit = self.cull_limit if limit is None else limit - - if cull_limit == 0: - return - - # Evict expired keys. - - select_expired_template = ( - 'SELECT %s FROM Cache' - ' WHERE expire_time IS NOT NULL AND expire_time < ?' - ' ORDER BY expire_time LIMIT ?' - ) - - select_expired = select_expired_template % 'filename' - rows = sql(select_expired, (now, cull_limit)).fetchall() - - if rows: - delete_expired = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( - select_expired_template % 'rowid' - ) - sql(delete_expired, (now, cull_limit)) - - for (filename,) in rows: - cleanup(filename) - - cull_limit -= len(rows) - - if cull_limit == 0: - return - - # Evict keys by policy. - - select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] - - if select_policy is None or self.volume() < self.size_limit: - return - - select_filename = select_policy.format(fields='filename', now=now) - rows = sql(select_filename, (cull_limit,)).fetchall() - - if rows: - delete = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( - select_policy.format(fields='rowid', now=now) - ) - sql(delete, (cull_limit,)) - - for (filename,) in rows: - cleanup(filename) - - def touch(self, key, expire=None, retry=False): - """Touch `key` in cache and update `expire` time. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param float expire: seconds until item expires - (default None, no expiry) - :param bool retry: retry if database timeout occurs (default False) - :return: True if key was touched - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - - with self._transact(retry) as (sql, _): - rows = sql( - 'SELECT rowid, expire_time FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_expire_time),) = rows - - if old_expire_time is None or old_expire_time > now: - sql( - 'UPDATE Cache SET expire_time = ? WHERE rowid = ?', - (expire_time, rowid), - ) - return True - - return False - - def add(self, key, value, expire=None, read=False, tag=None, retry=False): - """Add `key` and `value` item to cache. - - Similar to `set`, but only add to cache if key not present. - - Operation is atomic. Only one concurrent add operation for a given key - will succeed. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param value: value for item - :param float expire: seconds until the key expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was added - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read, key=key) - columns = (expire_time, tag, size, mode, filename, db_value) - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename, expire_time FROM Cache' - ' WHERE key = ? AND raw = ?', - (db_key, raw), - ).fetchall() - - if rows: - ((rowid, old_filename, old_expire_time),) = rows - - if old_expire_time is None or old_expire_time > now: - cleanup(filename) - return False - - cleanup(old_filename) - self._row_update(rowid, now, columns) - else: - self._row_insert(db_key, raw, now, columns) - - self._cull(now, sql, cleanup) - - return True - - def incr(self, key, delta=1, default=0, retry=False): - """Increment value by delta for item with key. - - If key is missing and default is None then raise KeyError. Else if key - is missing and default is not None then use default for value. - - Operation is atomic. All concurrent increment operations will be - counted individually. - - Assumes value may be stored in a SQLite column. Most builds that target - machines with 64-bit pointer widths will support 64-bit signed - integers. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param int delta: amount to increment (default 1) - :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout occurs (default False) - :return: new value for item - :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout occurs - - """ - now = time.time() - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid, expire_time, filename, value FROM Cache' - ' WHERE key = ? AND raw = ?' - ) - - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (db_key, raw)).fetchall() - - if not rows: - if default is None: - raise KeyError(key) - - value = default + delta - columns = (None, None) + self._disk.store( - value, False, key=key - ) - self._row_insert(db_key, raw, now, columns) - self._cull(now, sql, cleanup) - return value - - ((rowid, expire_time, filename, value),) = rows - - if expire_time is not None and expire_time < now: - if default is None: - raise KeyError(key) - - value = default + delta - columns = (None, None) + self._disk.store( - value, False, key=key - ) - self._row_update(rowid, now, columns) - self._cull(now, sql, cleanup) - cleanup(filename) - return value - - value += delta - - columns = 'store_time = ?, value = ?' - update_column = EVICTION_POLICY[self.eviction_policy]['get'] - - if update_column is not None: - columns += ', ' + update_column.format(now=now) - - update = 'UPDATE Cache SET %s WHERE rowid = ?' % columns - sql(update, (now, value, rowid)) - - return value - - def decr(self, key, delta=1, default=0, retry=False): - """Decrement value by delta for item with key. - - If key is missing and default is None then raise KeyError. Else if key - is missing and default is not None then use default for value. - - Operation is atomic. All concurrent decrement operations will be - counted individually. - - Unlike Memcached, negative values are supported. Value may be - decremented below zero. - - Assumes value may be stored in a SQLite column. Most builds that target - machines with 64-bit pointer widths will support 64-bit signed - integers. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param int delta: amount to decrement (default 1) - :param int default: value if key is missing (default 0) - :param bool retry: retry if database timeout occurs (default False) - :return: new value for item - :raises KeyError: if key is not found and default is None - :raises Timeout: if database timeout occurs - - """ - return self.incr(key, -delta, default, retry) - - def get( - self, - key, - default=None, - read=False, - expire_time=False, - tag=False, - retry=False, - ): - """Retrieve value from cache. If `key` is missing, return `default`. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param default: value to return if key is missing (default None) - :param bool read: if True, return file handle to value - (default False) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: value for item or default if key not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - update_column = EVICTION_POLICY[self.eviction_policy]['get'] - select = ( - 'SELECT rowid, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - if expire_time and tag: - default = (default, None, None) - elif expire_time or tag: - default = (default, None) - - if not self.statistics and update_column is None: - # Fast path, no transaction necessary. - - rows = self._sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - return default - - ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows - - try: - value = self._disk.fetch(mode, filename, db_value, read) - except IOError: - # Key was deleted before we could retrieve result. - return default - - else: # Slow path, transaction required. - cache_hit = ( - 'UPDATE Settings SET value = value + 1 WHERE key = "hits"' - ) - cache_miss = ( - 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' - ) - - with self._transact(retry) as (sql, _): - rows = sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - if self.statistics: - sql(cache_miss) - return default - - ( - (rowid, db_expire_time, db_tag, mode, filename, db_value), - ) = rows # noqa: E127 - - try: - value = self._disk.fetch(mode, filename, db_value, read) - except IOError: - # Key was deleted before we could retrieve result. - if self.statistics: - sql(cache_miss) - return default - - if self.statistics: - sql(cache_hit) - - now = time.time() - update = 'UPDATE Cache SET %s WHERE rowid = ?' - - if update_column is not None: - sql(update % update_column.format(now=now), (rowid,)) - - if expire_time and tag: - return (value, db_expire_time, db_tag) - elif expire_time: - return (value, db_expire_time) - elif tag: - return (value, db_tag) - else: - return value - - def __getitem__(self, key): - """Return corresponding value for `key` from cache. - - :param key: key matching item - :return: corresponding value - :raises KeyError: if key is not found - - """ - value = self.get(key, default=ENOVAL, retry=True) - if value is ENOVAL: - raise KeyError(key) - return value - - def read(self, key, retry=False): - """Return file handle value corresponding to `key` from cache. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default False) - :return: file open for reading in binary mode - :raises KeyError: if key is not found - :raises Timeout: if database timeout occurs - - """ - handle = self.get(key, default=ENOVAL, read=True, retry=retry) - if handle is ENOVAL: - raise KeyError(key) - return handle - - def __contains__(self, key): - """Return `True` if `key` matching item is found in cache. - - :param key: key matching item - :return: True if key matching item - - """ - sql = self._sql - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid FROM Cache' - ' WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - rows = sql(select, (db_key, raw, time.time())).fetchall() - - return bool(rows) - - def pop( - self, key, default=None, expire_time=False, tag=False, retry=False - ): # noqa: E501 - """Remove corresponding item for `key` from cache and return value. - - If `key` is missing, return `default`. - - Operation is atomic. Concurrent operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key for item - :param default: value to return if key is missing (default None) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: value for item or default if key not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - select = ( - 'SELECT rowid, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)' - ) - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - with self._transact(retry) as (sql, _): - rows = sql(select, (db_key, raw, time.time())).fetchall() - - if not rows: - return default - - ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows - - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - try: - value = self._disk.fetch(mode, filename, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - return default - finally: - if filename is not None: - self._disk.remove(filename) - - if expire_time and tag: - return value, db_expire_time, db_tag - elif expire_time: - return value, db_expire_time - elif tag: - return value, db_tag - else: - return value - - def __delitem__(self, key, retry=True): - """Delete corresponding item for `key` from cache. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default `True`). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default True) - :raises KeyError: if key is not found - :raises Timeout: if database timeout occurs - - """ - db_key, raw = self._disk.put(key) - - with self._transact(retry) as (sql, cleanup): - rows = sql( - 'SELECT rowid, filename FROM Cache' - ' WHERE key = ? AND raw = ?' - ' AND (expire_time IS NULL OR expire_time > ?)', - (db_key, raw, time.time()), - ).fetchall() - - if not rows: - raise KeyError(key) - - ((rowid, filename),) = rows - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(filename) - - return True - - def delete(self, key, retry=False): - """Delete corresponding item for `key` from cache. - - Missing keys are ignored. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param key: key matching item - :param bool retry: retry if database timeout occurs (default False) - :return: True if item was deleted - :raises Timeout: if database timeout occurs - - """ - # pylint: disable=unnecessary-dunder-call - try: - return self.__delitem__(key, retry=retry) - except KeyError: - return False - - def push( - self, - value, - prefix=None, - side='back', - expire=None, - read=False, - tag=None, - retry=False, - ): - """Push `value` onto `side` of queue identified by `prefix` in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - Defaults to pushing value on back of queue. Set side to 'front' to push - value on front of queue. Side must be one of 'back' or 'front'. - - Operation is atomic. Concurrent operations will be serialized. - - When `read` is `True`, `value` should be a file-like object opened - for reading in binary mode. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.pull`. - - >>> cache = Cache() - >>> print(cache.push('first value')) - 500000000000000 - >>> cache.get(500000000000000) - 'first value' - >>> print(cache.push('second value')) - 500000000000001 - >>> print(cache.push('third value', side='front')) - 499999999999999 - >>> cache.push(1234, prefix='userids') - 'userids-500000000000000' - - :param value: value for item - :param str prefix: key prefix (default None, key is integer) - :param str side: either 'back' or 'front' (default 'back') - :param float expire: seconds until the key expires - (default None, no expiry) - :param bool read: read value as bytes from file (default False) - :param str tag: text to associate with key (default None) - :param bool retry: retry if database timeout occurs (default False) - :return: key for item in cache - :raises Timeout: if database timeout occurs - - """ - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - now = time.time() - raw = True - expire_time = None if expire is None else now + expire - size, mode, filename, db_value = self._disk.store(value, read) - columns = (expire_time, tag, size, mode, filename, db_value) - order = {'back': 'DESC', 'front': 'ASC'} - select = ( - 'SELECT key FROM Cache' - ' WHERE ? < key AND key < ? AND raw = ?' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - with self._transact(retry, filename) as (sql, cleanup): - rows = sql(select, (min_key, max_key, raw)).fetchall() - - if rows: - ((key,),) = rows - - if prefix is not None: - num = int(key[(key.rfind('-') + 1) :]) - else: - num = key - - if side == 'back': - num += 1 - else: - assert side == 'front' - num -= 1 - else: - num = 500000000000000 - - if prefix is not None: - db_key = '{0}-{1:015d}'.format(prefix, num) - else: - db_key = num - - self._row_insert(db_key, raw, now, columns) - self._cull(now, sql, cleanup) - - return db_key - - def pull( - self, - prefix=None, - default=(None, None), - side='front', - expire_time=False, - tag=False, - retry=False, - ): - """Pull key and value item pair from `side` of queue in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - If queue is empty, return default. - - Defaults to pulling key and value item pairs from front of queue. Set - side to 'back' to pull from back of queue. Side must be one of 'front' - or 'back'. - - Operation is atomic. Concurrent operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.push` and `Cache.get`. - - >>> cache = Cache() - >>> cache.pull() - (None, None) - >>> for letter in 'abc': - ... print(cache.push(letter)) - 500000000000000 - 500000000000001 - 500000000000002 - >>> key, value = cache.pull() - >>> print(key) - 500000000000000 - >>> value - 'a' - >>> _, value = cache.pull(side='back') - >>> value - 'c' - >>> cache.push(1234, 'userids') - 'userids-500000000000000' - >>> _, value = cache.pull('userids') - >>> value - 1234 - - :param str prefix: key prefix (default None, key is integer) - :param default: value to return if key is missing - (default (None, None)) - :param str side: either 'front' or 'back' (default 'front') - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair or default if queue is empty - :raises Timeout: if database timeout occurs - - """ - # Caution: Nearly identical code exists in Cache.peek - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - order = {'front': 'ASC', 'back': 'DESC'} - select = ( - 'SELECT rowid, key, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (min_key, max_key)).fetchall() - - if not rows: - return default - - ( - (rowid, key, db_expire, db_tag, mode, name, db_value), - ) = rows - - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - if db_expire is not None and db_expire < time.time(): - cleanup(name) - else: - break - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - finally: - if name is not None: - self._disk.remove(name) - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def peek( - self, - prefix=None, - default=(None, None), - side='front', - expire_time=False, - tag=False, - retry=False, - ): - """Peek at key and value item pair from `side` of queue in cache. - - When prefix is None, integer keys are used. Otherwise, string keys are - used in the format "prefix-integer". Integer starts at 500 trillion. - - If queue is empty, return default. - - Defaults to peeking at key and value item pairs from front of queue. - Set side to 'back' to pull from back of queue. Side must be one of - 'front' or 'back'. - - Expired items are deleted from cache. Operation is atomic. Concurrent - operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - See also `Cache.pull` and `Cache.push`. - - >>> cache = Cache() - >>> for letter in 'abc': - ... print(cache.push(letter)) - 500000000000000 - 500000000000001 - 500000000000002 - >>> key, value = cache.peek() - >>> print(key) - 500000000000000 - >>> value - 'a' - >>> key, value = cache.peek(side='back') - >>> print(key) - 500000000000002 - >>> value - 'c' - - :param str prefix: key prefix (default None, key is integer) - :param default: value to return if key is missing - (default (None, None)) - :param str side: either 'front' or 'back' (default 'front') - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair or default if queue is empty - :raises Timeout: if database timeout occurs - - """ - # Caution: Nearly identical code exists in Cache.pull - if prefix is None: - min_key = 0 - max_key = 999999999999999 - else: - min_key = prefix + '-000000000000000' - max_key = prefix + '-999999999999999' - - order = {'front': 'ASC', 'back': 'DESC'} - select = ( - 'SELECT rowid, key, expire_time, tag, mode, filename, value' - ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' - ' ORDER BY key %s LIMIT 1' - ) % order[side] - - if expire_time and tag: - default = default, None, None - elif expire_time or tag: - default = default, None - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, (min_key, max_key)).fetchall() - - if not rows: - return default - - ( - (rowid, key, db_expire, db_tag, mode, name, db_value), - ) = rows - - if db_expire is not None and db_expire < time.time(): - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(name) - else: - break - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def peekitem(self, last=True, expire_time=False, tag=False, retry=False): - """Peek at key and value item pair in cache based on iteration order. - - Expired items are deleted from cache. Operation is atomic. Concurrent - operations will be serialized. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - >>> cache = Cache() - >>> for num, letter in enumerate('abc'): - ... cache[letter] = num - >>> cache.peekitem() - ('c', 2) - >>> cache.peekitem(last=False) - ('a', 0) - - :param bool last: last item in iteration order (default True) - :param bool expire_time: if True, return expire_time in tuple - (default False) - :param bool tag: if True, return tag in tuple (default False) - :param bool retry: retry if database timeout occurs (default False) - :return: key and value item pair - :raises KeyError: if cache is empty - :raises Timeout: if database timeout occurs - - """ - order = ('ASC', 'DESC') - select = ( - 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' - ' FROM Cache ORDER BY rowid %s LIMIT 1' - ) % order[last] - - while True: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select).fetchall() - - if not rows: - raise KeyError('dictionary is empty') - - ( - ( - rowid, - db_key, - raw, - db_expire, - db_tag, - mode, - name, - db_value, - ), - ) = rows - - if db_expire is not None and db_expire < time.time(): - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - cleanup(name) - else: - break - - key = self._disk.get(db_key, raw) - - try: - value = self._disk.fetch(mode, name, db_value, False) - except IOError: - # Key was deleted before we could retrieve result. - continue - break - - if expire_time and tag: - return (key, value), db_expire, db_tag - elif expire_time: - return (key, value), db_expire - elif tag: - return (key, value), db_tag - else: - return key, value - - def memoize( - self, name=None, typed=False, expire=None, tag=None, ignore=() - ): - """Memoizing cache decorator. - - Decorator to wrap callable with memoizing function using cache. - Repeated calls with the same arguments will lookup result in cache and - avoid function evaluation. - - If name is set to None (default), the callable name will be determined - automatically. - - When expire is set to zero, function results will not be set in the - cache. Cache lookups still occur, however. Read - :doc:`case-study-landing-page-caching` for example usage. - - If typed is set to True, function arguments of different types will be - cached separately. For example, f(3) and f(3.0) will be treated as - distinct calls with distinct results. - - The original underlying function is accessible through the __wrapped__ - attribute. This is useful for introspection, for bypassing the cache, - or for rewrapping the function with a different cache. - - >>> from diskcache import Cache - >>> cache = Cache() - >>> @cache.memoize(expire=1, tag='fib') - ... def fibonacci(number): - ... if number == 0: - ... return 0 - ... elif number == 1: - ... return 1 - ... else: - ... return fibonacci(number - 1) + fibonacci(number - 2) - >>> print(fibonacci(100)) - 354224848179261915075 - - An additional `__cache_key__` attribute can be used to generate the - cache key used for the given arguments. - - >>> key = fibonacci.__cache_key__(100) - >>> print(cache[key]) - 354224848179261915075 - - Remember to call memoize when decorating a callable. If you forget, - then a TypeError will occur. Note the lack of parenthenses after - memoize below: - - >>> @cache.memoize - ... def test(): - ... pass - Traceback (most recent call last): - ... - TypeError: name cannot be callable - - :param cache: cache to store callable arguments and return values - :param str name: name given for callable (default None, automatic) - :param bool typed: cache different types separately (default False) - :param float expire: seconds until arguments expire - (default None, no expiry) - :param str tag: text to associate with arguments (default None) - :param set ignore: positional or keyword args to ignore (default ()) - :return: callable decorator - - """ - # Caution: Nearly identical code exists in DjangoCache.memoize - if callable(name): - raise TypeError('name cannot be callable') - - def decorator(func): - """Decorator created by memoize() for callable `func`.""" - base = (full_name(func),) if name is None else (name,) - - @ft.wraps(func) - def wrapper(*args, **kwargs): - """Wrapper for callable to cache arguments and return values.""" - key = wrapper.__cache_key__(*args, **kwargs) - result = self.get(key, default=ENOVAL, retry=True) - - if result is ENOVAL: - result = func(*args, **kwargs) - if expire is None or expire > 0: - self.set(key, result, expire, tag=tag, retry=True) - - return result - - def __cache_key__(*args, **kwargs): - """Make key for cache given function arguments.""" - return args_to_key(base, args, kwargs, typed, ignore) - - wrapper.__cache_key__ = __cache_key__ - return wrapper - - return decorator - - def check(self, fix=False, retry=False): - """Check database and file system consistency. - - Intended for use in testing and post-mortem error analysis. - - While checking the Cache table for consistency, a writer lock is held - on the database. The lock blocks other cache clients from writing to - the database. For caches with many file references, the lock may be - held for a long time. For example, local benchmarking shows that a - cache with 1,000 file references takes ~60ms to check. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool fix: correct inconsistencies - :param bool retry: retry if database timeout occurs (default False) - :return: list of warnings - :raises Timeout: if database timeout occurs - - """ - # pylint: disable=access-member-before-definition,W0201 - with warnings.catch_warnings(record=True) as warns: - sql = self._sql - - # Check integrity of database. - - rows = sql('PRAGMA integrity_check').fetchall() - - if len(rows) != 1 or rows[0][0] != 'ok': - for (message,) in rows: - warnings.warn(message) - - if fix: - sql('VACUUM') - - with self._transact(retry) as (sql, _): - - # Check Cache.filename against file system. - - filenames = set() - select = ( - 'SELECT rowid, size, filename FROM Cache' - ' WHERE filename IS NOT NULL' - ) - - rows = sql(select).fetchall() - - for rowid, size, filename in rows: - full_path = op.join(self._directory, filename) - filenames.add(full_path) - - if op.exists(full_path): - real_size = op.getsize(full_path) - - if size != real_size: - message = 'wrong file size: %s, %d != %d' - args = full_path, real_size, size - warnings.warn(message % args) - - if fix: - sql( - 'UPDATE Cache SET size = ?' - ' WHERE rowid = ?', - (real_size, rowid), - ) - - continue - - warnings.warn('file not found: %s' % full_path) - - if fix: - sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) - - # Check file system against Cache.filename. - - for dirpath, _, files in os.walk(self._directory): - paths = [op.join(dirpath, filename) for filename in files] - error = set(paths) - filenames - - for full_path in error: - if DBNAME in full_path: - continue - - message = 'unknown file: %s' % full_path - warnings.warn(message, UnknownFileWarning) - - if fix: - os.remove(full_path) - - # Check for empty directories. - - for dirpath, dirs, files in os.walk(self._directory): - if not (dirs or files): - message = 'empty directory: %s' % dirpath - warnings.warn(message, EmptyDirWarning) - - if fix: - os.rmdir(dirpath) - - # Check Settings.count against count of Cache rows. - - self.reset('count') - ((count,),) = sql('SELECT COUNT(key) FROM Cache').fetchall() - - if self.count != count: - message = 'Settings.count != COUNT(Cache.key); %d != %d' - warnings.warn(message % (self.count, count)) - - if fix: - sql( - 'UPDATE Settings SET value = ? WHERE key = ?', - (count, 'count'), - ) - - # Check Settings.size against sum of Cache.size column. - - self.reset('size') - select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' - ((size,),) = sql(select_size).fetchall() - - if self.size != size: - message = 'Settings.size != SUM(Cache.size); %d != %d' - warnings.warn(message % (self.size, size)) - - if fix: - sql( - 'UPDATE Settings SET value = ? WHERE key =?', - (size, 'size'), - ) - - return warns - - def create_tag_index(self): - """Create tag index on cache database. - - It is better to initialize cache with `tag_index=True` than use this. - - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') - self.reset('tag_index', 1) - - def drop_tag_index(self): - """Drop tag index on cache database. - - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql('DROP INDEX IF EXISTS Cache_tag_rowid') - self.reset('tag_index', 0) - - def evict(self, tag, retry=False): - """Remove items with matching `tag` from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param str tag: tag identifying items - :param bool retry: retry if database timeout occurs (default False) - :return: count of rows removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, filename FROM Cache' - ' WHERE tag = ? AND rowid > ?' - ' ORDER BY rowid LIMIT ?' - ) - args = [tag, 0, 100] - return self._select_delete(select, args, arg_index=1, retry=retry) - - def expire(self, now=None, retry=False): - """Remove expired items from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param float now: current time (default None, ``time.time()`` used) - :param bool retry: retry if database timeout occurs (default False) - :return: count of items removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, expire_time, filename FROM Cache' - ' WHERE ? < expire_time AND expire_time < ?' - ' ORDER BY expire_time LIMIT ?' - ) - args = [0, now or time.time(), 100] - return self._select_delete(select, args, row_index=1, retry=retry) - - def cull(self, retry=False): - """Cull items from cache until volume is less than size limit. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool retry: retry if database timeout occurs (default False) - :return: count of items removed - :raises Timeout: if database timeout occurs - - """ - now = time.time() - - # Remove expired items. - - count = self.expire(now) - - # Remove items by policy. - - select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] - - if select_policy is None: - return 0 - - select_filename = select_policy.format(fields='filename', now=now) - - try: - while self.volume() > self.size_limit: - with self._transact(retry) as (sql, cleanup): - rows = sql(select_filename, (10,)).fetchall() - - if not rows: - break - - count += len(rows) - delete = ( - 'DELETE FROM Cache WHERE rowid IN (%s)' - % select_policy.format(fields='rowid', now=now) - ) - sql(delete, (10,)) - - for (filename,) in rows: - cleanup(filename) - except Timeout: - raise Timeout(count) from None - - return count - - def clear(self, retry=False): - """Remove all items from cache. - - Removing items is an iterative process. In each iteration, a subset of - items is removed. Concurrent writes may occur between iterations. - - If a :exc:`Timeout` occurs, the first element of the exception's - `args` attribute will be the number of items removed before the - exception occurred. - - Raises :exc:`Timeout` error when database timeout occurs and `retry` is - `False` (default). - - :param bool retry: retry if database timeout occurs (default False) - :return: count of rows removed - :raises Timeout: if database timeout occurs - - """ - select = ( - 'SELECT rowid, filename FROM Cache' - ' WHERE rowid > ?' - ' ORDER BY rowid LIMIT ?' - ) - args = [0, 100] - return self._select_delete(select, args, retry=retry) - - def _select_delete( - self, select, args, row_index=0, arg_index=0, retry=False - ): - count = 0 - delete = 'DELETE FROM Cache WHERE rowid IN (%s)' - - try: - while True: - with self._transact(retry) as (sql, cleanup): - rows = sql(select, args).fetchall() - - if not rows: - break - - count += len(rows) - sql(delete % ','.join(str(row[0]) for row in rows)) - - for row in rows: - args[arg_index] = row[row_index] - cleanup(row[-1]) - - except Timeout: - raise Timeout(count) from None - - return count - - def iterkeys(self, reverse=False): - """Iterate Cache keys in database sort order. - - >>> cache = Cache() - >>> for key in [4, 1, 3, 0, 2]: - ... cache[key] = key - >>> list(cache.iterkeys()) - [0, 1, 2, 3, 4] - >>> list(cache.iterkeys(reverse=True)) - [4, 3, 2, 1, 0] - - :param bool reverse: reverse sort order (default False) - :return: iterator of Cache keys - - """ - sql = self._sql - limit = 100 - _disk_get = self._disk.get - - if reverse: - select = ( - 'SELECT key, raw FROM Cache' - ' ORDER BY key DESC, raw DESC LIMIT 1' - ) - iterate = ( - 'SELECT key, raw FROM Cache' - ' WHERE key = ? AND raw < ? OR key < ?' - ' ORDER BY key DESC, raw DESC LIMIT ?' - ) - else: - select = ( - 'SELECT key, raw FROM Cache' - ' ORDER BY key ASC, raw ASC LIMIT 1' - ) - iterate = ( - 'SELECT key, raw FROM Cache' - ' WHERE key = ? AND raw > ? OR key > ?' - ' ORDER BY key ASC, raw ASC LIMIT ?' - ) - - row = sql(select).fetchall() - - if row: - ((key, raw),) = row - else: - return - - yield _disk_get(key, raw) - - while True: - rows = sql(iterate, (key, raw, key, limit)).fetchall() - - if not rows: - break - - for key, raw in rows: - yield _disk_get(key, raw) - - def _iter(self, ascending=True): - sql = self._sql - rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() - ((max_rowid,),) = rows - yield # Signal ready. - - if max_rowid is None: - return - - bound = max_rowid + 1 - limit = 100 - _disk_get = self._disk.get - rowid = 0 if ascending else bound - select = ( - 'SELECT rowid, key, raw FROM Cache' - ' WHERE ? < rowid AND rowid < ?' - ' ORDER BY rowid %s LIMIT ?' - ) % ('ASC' if ascending else 'DESC') - - while True: - if ascending: - args = (rowid, bound, limit) - else: - args = (0, rowid, limit) - - rows = sql(select, args).fetchall() - - if not rows: - break - - for rowid, key, raw in rows: - yield _disk_get(key, raw) - - def __iter__(self): - """Iterate keys in cache including expired items.""" - iterator = self._iter() - next(iterator) - return iterator - - def __reversed__(self): - """Reverse iterate keys in cache including expired items.""" - iterator = self._iter(ascending=False) - next(iterator) - return iterator - - def stats(self, enable=True, reset=False): - """Return cache statistics hits and misses. - - :param bool enable: enable collecting statistics (default True) - :param bool reset: reset hits and misses to 0 (default False) - :return: (hits, misses) - - """ - # pylint: disable=E0203,W0201 - result = (self.reset('hits'), self.reset('misses')) - - if reset: - self.reset('hits', 0) - self.reset('misses', 0) - - self.reset('statistics', enable) - - return result - - def volume(self): - """Return estimated total size of cache on disk. - - :return: size in bytes - - """ - ((page_count,),) = self._sql('PRAGMA page_count').fetchall() - total_size = self._page_size * page_count + self.reset('size') - return total_size - - def close(self): - """Close database connection.""" - con = getattr(self._local, 'con', None) - - if con is None: - return - - con.close() - - try: - delattr(self._local, 'con') - except AttributeError: - pass - - def __enter__(self): - # Create connection in thread. - # pylint: disable=unused-variable - connection = self._con # noqa - return self - - def __exit__(self, *exception): - self.close() - - def __len__(self): - """Count of items in cache including expired items.""" - return self.reset('count') - - def __getstate__(self): - return (self.directory, self.timeout, type(self.disk)) - - def __setstate__(self, state): - self.__init__(*state) - - def reset(self, key, value=ENOVAL, update=True): - """Reset `key` and `value` item from Settings table. - - Use `reset` to update the value of Cache settings correctly. Cache - settings are stored in the Settings table of the SQLite database. If - `update` is ``False`` then no attempt is made to update the database. - - If `value` is not given, it is reloaded from the Settings - table. Otherwise, the Settings table is updated. - - Settings with the ``disk_`` prefix correspond to Disk - attributes. Updating the value will change the unprefixed attribute on - the associated Disk instance. - - Settings with the ``sqlite_`` prefix correspond to SQLite - pragmas. Updating the value will execute the corresponding PRAGMA - statement. - - SQLite PRAGMA statements may be executed before the Settings table - exists in the database by setting `update` to ``False``. - - :param str key: Settings key for item - :param value: value for item (optional) - :param bool update: update database Settings table (default True) - :return: updated value for item - :raises Timeout: if database timeout occurs - - """ - sql = self._sql - sql_retry = self._sql_retry - - if value is ENOVAL: - select = 'SELECT value FROM Settings WHERE key = ?' - ((value,),) = sql_retry(select, (key,)).fetchall() - setattr(self, key, value) - return value - - if update: - statement = 'UPDATE Settings SET value = ? WHERE key = ?' - sql_retry(statement, (value, key)) - - if key.startswith('sqlite_'): - pragma = key[7:] - - # 2016-02-17 GrantJ - PRAGMA and isolation_level=None - # don't always play nicely together. Retry setting the - # PRAGMA. I think some PRAGMA statements expect to - # immediately take an EXCLUSIVE lock on the database. I - # can't find any documentation for this but without the - # retry, stress will intermittently fail with multiple - # processes. - - # 2018-11-05 GrantJ - Avoid setting pragma values that - # are already set. Pragma settings like auto_vacuum and - # journal_mode can take a long time or may not work after - # tables have been created. - - start = time.time() - while True: - try: - try: - ((old_value,),) = sql( - 'PRAGMA %s' % (pragma) - ).fetchall() - update = old_value != value - except ValueError: - update = True - if update: - sql('PRAGMA %s = %s' % (pragma, value)).fetchall() - break - except sqlite3.OperationalError as exc: - if str(exc) != 'database is locked': - raise - diff = time.time() - start - if diff > 60: - raise - time.sleep(0.001) - elif key.startswith('disk_'): - attr = key[5:] - setattr(self._disk, attr, value) - - setattr(self, key, value) - return value - -if hasattr(os, 'register_at_fork'): - _lock = threading.RLock() - - def _acquireLock(): - global _lock - if _lock: - try: - _lock.acquire() - except BaseException: - _lock.release() - raise - - def _releaseLock(): - global _lock - if _lock: - _lock.release() - - def _after_at_fork_child_reinit_locks(): - global _lock - _lock = threading.RLock() - - os.register_at_fork(before=_acquireLock, - after_in_child=_after_at_fork_child_reinit_locks, - after_in_parent=_releaseLock) -else: - def _acquireLock(): - pass - - def _releaseLock(): - pass +"""Core disk and file backed cache API. +""" + +import codecs +import contextlib as cl +import errno +import functools as ft +import io +import json +import os +import os.path as op +import pickle +import pickletools +import sqlite3 +import struct +import tempfile +import threading +import time +import warnings +import zlib + + +def full_name(func): + """Return full name of `func` by adding the module and function name.""" + return func.__module__ + '.' + func.__qualname__ + + +class Constant(tuple): + """Pretty display of immutable constant.""" + + def __new__(cls, name): + return tuple.__new__(cls, (name,)) + + def __repr__(self): + return '%s' % self[0] + + +DBNAME = 'cache.db' +ENOVAL = Constant('ENOVAL') +UNKNOWN = Constant('UNKNOWN') + +MODE_NONE = 0 +MODE_RAW = 1 +MODE_BINARY = 2 +MODE_TEXT = 3 +MODE_PICKLE = 4 + +DEFAULT_SETTINGS = { + 'statistics': 0, # False + 'tag_index': 0, # False + 'eviction_policy': 'least-recently-stored', + 'size_limit': 2**30, # 1gb + 'cull_limit': 10, + 'sqlite_auto_vacuum': 1, # FULL + 'sqlite_cache_size': 2**13, # 8,192 pages + 'sqlite_journal_mode': 'wal', + 'sqlite_mmap_size': 2**26, # 64mb + 'sqlite_synchronous': 1, # NORMAL + 'disk_min_file_size': 2**15, # 32kb + 'disk_pickle_protocol': pickle.HIGHEST_PROTOCOL, +} + +METADATA = { + 'count': 0, + 'size': 0, + 'hits': 0, + 'misses': 0, +} + +EVICTION_POLICY = { + 'none': { + 'init': None, + 'get': None, + 'cull': None, + }, + 'least-recently-stored': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_store_time ON' + ' Cache (store_time)' + ), + 'get': None, + 'cull': 'SELECT {fields} FROM Cache ORDER BY store_time LIMIT ?', + }, + 'least-recently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_time ON' + ' Cache (access_time)' + ), + 'get': 'access_time = {now}', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_time LIMIT ?', + }, + 'least-frequently-used': { + 'init': ( + 'CREATE INDEX IF NOT EXISTS Cache_access_count ON' + ' Cache (access_count)' + ), + 'get': 'access_count = access_count + 1', + 'cull': 'SELECT {fields} FROM Cache ORDER BY access_count LIMIT ?', + }, +} + + +class Disk: + """Cache key and value serialization for SQLite database and files.""" + + def __init__(self, directory, min_file_size=0, pickle_protocol=0): + """Initialize disk instance. + + :param str directory: directory path + :param int min_file_size: minimum size for file use + :param int pickle_protocol: pickle protocol for serialization + + """ + self._directory = directory + self.min_file_size = min_file_size + self.pickle_protocol = pickle_protocol + + def hash(self, key): + """Compute portable hash for `key`. + + :param key: key to hash + :return: hash value + + """ + mask = 0xFFFFFFFF + disk_key, _ = self.put(key) + type_disk_key = type(disk_key) + + if type_disk_key is sqlite3.Binary: + return zlib.adler32(disk_key) & mask + elif type_disk_key is str: + return zlib.adler32(disk_key.encode('utf-8')) & mask # noqa + elif type_disk_key is int: + return disk_key % mask + else: + assert type_disk_key is float + return zlib.adler32(struct.pack('!d', disk_key)) & mask + + def put(self, key): + """Convert `key` to fields key and raw for Cache table. + + :param key: key to convert + :return: (database key, raw boolean) pair + + """ + # pylint: disable=unidiomatic-typecheck + type_key = type(key) + + if type_key is bytes: + return sqlite3.Binary(key), True + elif ( + (type_key is str) + or ( + type_key is int + and -9223372036854775808 <= key <= 9223372036854775807 + ) + or (type_key is float) + ): + return key, True + else: + data = pickle.dumps(key, protocol=self.pickle_protocol) + result = pickletools.optimize(data) + return sqlite3.Binary(result), False + + def get(self, key, raw): + """Convert fields `key` and `raw` from Cache table to key. + + :param key: database key to convert + :param bool raw: flag indicating raw database storage + :return: corresponding Python key + + """ + # pylint: disable=unidiomatic-typecheck + if raw: + return bytes(key) if type(key) is sqlite3.Binary else key + else: + return pickle.load(io.BytesIO(key)) + + def store(self, value, read, key=UNKNOWN): + """Convert `value` to fields size, mode, filename, and value for Cache + table. + + :param value: value to convert + :param bool read: True when value is file-like object + :param key: key for item (default UNKNOWN) + :return: (size, mode, filename, value) tuple for Cache table + + """ + # pylint: disable=unidiomatic-typecheck + type_value = type(value) + min_file_size = self.min_file_size + + if ( + (type_value is str and len(value) < min_file_size) + or ( + type_value is int + and -9223372036854775808 <= value <= 9223372036854775807 + ) + or (type_value is float) + ): + return 0, MODE_RAW, None, value + elif type_value is bytes: + if len(value) < min_file_size: + return 0, MODE_RAW, None, sqlite3.Binary(value) + else: + filename, full_path = self.filename(key, value) + self._write(full_path, io.BytesIO(value), 'xb') + return len(value), MODE_BINARY, filename, None + elif type_value is str: + filename, full_path = self.filename(key, value) + self._write(full_path, io.StringIO(value), 'x', 'UTF-8') + size = op.getsize(full_path) + return size, MODE_TEXT, filename, None + elif read: + reader = ft.partial(value.read, 2**22) + filename, full_path = self.filename(key, value) + iterator = iter(reader, b'') + size = self._write(full_path, iterator, 'xb') + return size, MODE_BINARY, filename, None + else: + result = pickle.dumps(value, protocol=self.pickle_protocol) + + if len(result) < min_file_size: + return 0, MODE_PICKLE, None, sqlite3.Binary(result) + else: + filename, full_path = self.filename(key, value) + self._write(full_path, io.BytesIO(result), 'xb') + return len(result), MODE_PICKLE, filename, None + + def _write(self, full_path, iterator, mode, encoding=None): + full_dir, _ = op.split(full_path) + + for count in range(1, 11): + with cl.suppress(OSError): + os.makedirs(full_dir) + + try: + # Another cache may have deleted the directory before + # the file could be opened. + writer = open(full_path, mode, encoding=encoding) + except OSError: + if count == 10: + # Give up after 10 tries to open the file. + raise + continue + + with writer: + size = 0 + for chunk in iterator: + size += len(chunk) + writer.write(chunk) + return size + + def fetch(self, mode, filename, value, read): + """Convert fields `mode`, `filename`, and `value` from Cache table to + value. + + :param int mode: value mode raw, binary, text, or pickle + :param str filename: filename of corresponding value + :param value: database value + :param bool read: when True, return an open file handle + :return: corresponding Python value + :raises: IOError if the value cannot be read + + """ + # pylint: disable=unidiomatic-typecheck,consider-using-with + if mode == MODE_RAW: + return bytes(value) if type(value) is sqlite3.Binary else value + elif mode == MODE_BINARY: + if read: + return open(op.join(self._directory, filename), 'rb') + else: + with open(op.join(self._directory, filename), 'rb') as reader: + return reader.read() + elif mode == MODE_TEXT: + full_path = op.join(self._directory, filename) + with open(full_path, 'r', encoding='UTF-8') as reader: + return reader.read() + elif mode == MODE_PICKLE: + if value is None: + with open(op.join(self._directory, filename), 'rb') as reader: + return pickle.load(reader) + else: + return pickle.load(io.BytesIO(value)) + + def filename(self, key=UNKNOWN, value=UNKNOWN): + """Return filename and full-path tuple for file storage. + + Filename will be a randomly generated 28 character hexadecimal string + with ".val" suffixed. Two levels of sub-directories will be used to + reduce the size of directories. On older filesystems, lookups in + directories with many files may be slow. + + The default implementation ignores the `key` and `value` parameters. + + In some scenarios, for example :meth:`Cache.push + `, the `key` or `value` may not be known when the + item is stored in the cache. + + :param key: key for item (default UNKNOWN) + :param value: value for item (default UNKNOWN) + + """ + # pylint: disable=unused-argument + hex_name = codecs.encode(os.urandom(16), 'hex').decode('utf-8') + sub_dir = op.join(hex_name[:2], hex_name[2:4]) + name = hex_name[4:] + '.val' + filename = op.join(sub_dir, name) + full_path = op.join(self._directory, filename) + return filename, full_path + + def remove(self, file_path): + """Remove a file given by `file_path`. + + This method is cross-thread and cross-process safe. If an OSError + occurs, it is suppressed. + + :param str file_path: relative path to file + + """ + full_path = op.join(self._directory, file_path) + full_dir, _ = op.split(full_path) + + # Suppress OSError that may occur if two caches attempt to delete the + # same file or directory at the same time. + + with cl.suppress(OSError): + os.remove(full_path) + + with cl.suppress(OSError): + os.removedirs(full_dir) + + +class JSONDisk(Disk): + """Cache key and value using JSON serialization with zlib compression.""" + + def __init__(self, directory, compress_level=1, **kwargs): + """Initialize JSON disk instance. + + Keys and values are compressed using the zlib library. The + `compress_level` is an integer from 0 to 9 controlling the level of + compression; 1 is fastest and produces the least compression, 9 is + slowest and produces the most compression, and 0 is no compression. + + :param str directory: directory path + :param int compress_level: zlib compression level (default 1) + :param kwargs: super class arguments + + """ + self.compress_level = compress_level + super().__init__(directory, **kwargs) + + def put(self, key): + json_bytes = json.dumps(key).encode('utf-8') + data = zlib.compress(json_bytes, self.compress_level) + return super().put(data) + + def get(self, key, raw): + data = super().get(key, raw) + return json.loads(zlib.decompress(data).decode('utf-8')) + + def store(self, value, read, key=UNKNOWN): + if not read: + json_bytes = json.dumps(value).encode('utf-8') + value = zlib.compress(json_bytes, self.compress_level) + return super().store(value, read, key=key) + + def fetch(self, mode, filename, value, read): + data = super().fetch(mode, filename, value, read) + if not read: + data = json.loads(zlib.decompress(data).decode('utf-8')) + return data + + +class Timeout(Exception): + """Database timeout expired.""" + + +class UnknownFileWarning(UserWarning): + """Warning used by Cache.check for unknown files.""" + + +class EmptyDirWarning(UserWarning): + """Warning used by Cache.check for empty directories.""" + + +def args_to_key(base, args, kwargs, typed, ignore): + """Create cache key out of function arguments. + + :param tuple base: base of key + :param tuple args: function arguments + :param dict kwargs: function keyword arguments + :param bool typed: include types in cache key + :param set ignore: positional or keyword args to ignore + :return: cache key tuple + + """ + args = tuple(arg for index, arg in enumerate(args) if index not in ignore) + key = base + args + (None,) + + if kwargs: + kwargs = {key: val for key, val in kwargs.items() if key not in ignore} + sorted_items = sorted(kwargs.items()) + + for item in sorted_items: + key += item + + if typed: + key += tuple(type(arg) for arg in args) + + if kwargs: + key += tuple(type(value) for _, value in sorted_items) + + return key + + +class Cache: + """Disk and file backed cache.""" + + def __init__(self, directory=None, timeout=60, disk=Disk, **settings): + """Initialize cache instance. + + :param str directory: cache directory + :param float timeout: SQLite connection timeout + :param disk: Disk type or subclass for serialization + :param settings: any of DEFAULT_SETTINGS + + """ + try: + assert issubclass(disk, Disk) + except (TypeError, AssertionError): + raise ValueError('disk must subclass diskcache.Disk') from None + + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = str(directory) + directory = op.expanduser(directory) + directory = op.expandvars(directory) + + self._directory = directory + self._timeout = 0 # Manually handle retries during initialization. + self._local = threading.local() + self._txn_id = None + + if not op.isdir(directory): + try: + os.makedirs(directory, 0o755) + except OSError as error: + if error.errno != errno.EEXIST: + raise EnvironmentError( + error.errno, + 'Cache directory "%s" does not exist' + ' and could not be created' % self._directory, + ) from None + + sql = self._sql_retry + + # Setup Settings table. + + try: + current_settings = dict( + sql('SELECT key, value FROM Settings').fetchall() + ) + except sqlite3.OperationalError: + current_settings = {} + + sets = DEFAULT_SETTINGS.copy() + sets.update(current_settings) + sets.update(settings) + + for key in METADATA: + sets.pop(key, None) + + # Chance to set pragmas before any tables are created. + + for key, value in sorted(sets.items()): + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + sql( + 'CREATE TABLE IF NOT EXISTS Settings (' + ' key TEXT NOT NULL UNIQUE,' + ' value)' + ) + + # Setup Disk object (must happen after settings initialized). + + kwargs = { + key[5:]: value + for key, value in sets.items() + if key.startswith('disk_') + } + self._disk = disk(directory, **kwargs) + + # Set cached attributes: updates settings and sets pragmas. + + for key, value in sets.items(): + query = 'INSERT OR REPLACE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key, value) + + for key, value in METADATA.items(): + query = 'INSERT OR IGNORE INTO Settings VALUES (?, ?)' + sql(query, (key, value)) + self.reset(key) + + ((self._page_size,),) = sql('PRAGMA page_size').fetchall() + + # Setup Cache table. + + sql( + 'CREATE TABLE IF NOT EXISTS Cache (' + ' rowid INTEGER PRIMARY KEY,' + ' key BLOB,' + ' raw INTEGER,' + ' store_time REAL,' + ' expire_time REAL,' + ' access_time REAL,' + ' access_count INTEGER DEFAULT 0,' + ' tag BLOB,' + ' size INTEGER DEFAULT 0,' + ' mode INTEGER DEFAULT 0,' + ' filename TEXT,' + ' value BLOB)' + ) + + sql( + 'CREATE UNIQUE INDEX IF NOT EXISTS Cache_key_raw ON' + ' Cache(key, raw)' + ) + + sql( + 'CREATE INDEX IF NOT EXISTS Cache_expire_time ON' + ' Cache (expire_time)' + ) + + query = EVICTION_POLICY[self.eviction_policy]['init'] + + if query is not None: + sql(query) + + # Use triggers to keep Metadata updated. + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + 1' + ' WHERE key = "count"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_count_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - 1' + ' WHERE key = "count"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_insert' + ' AFTER INSERT ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value + NEW.size' + ' WHERE key = "size"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_update' + ' AFTER UPDATE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings' + ' SET value = value + NEW.size - OLD.size' + ' WHERE key = "size"; END' + ) + + sql( + 'CREATE TRIGGER IF NOT EXISTS Settings_size_delete' + ' AFTER DELETE ON Cache FOR EACH ROW BEGIN' + ' UPDATE Settings SET value = value - OLD.size' + ' WHERE key = "size"; END' + ) + + # Create tag index if requested. + + if self.tag_index: # pylint: disable=no-member + self.create_tag_index() + else: + self.drop_tag_index() + + # Close and re-open database connection with given timeout. + + self.close() + self._timeout = timeout + self._sql # pylint: disable=pointless-statement + + @property + def directory(self): + """Cache directory.""" + return self._directory + + @property + def timeout(self): + """SQLite connection timeout value in seconds.""" + return self._timeout + + @property + def disk(self): + """Disk used for serialization.""" + return self._disk + + @property + def _con(self): + # Check process ID to support process forking. If the process + # ID changes, close the connection and update the process ID. + + local_pid = getattr(self._local, 'pid', None) + pid = os.getpid() + + if local_pid != pid: + self.close() + self._local.pid = pid + + con = getattr(self._local, 'con', None) + + if con is None: + con = self._local.con = sqlite3.connect( + op.join(self._directory, DBNAME), + timeout=self._timeout, + isolation_level=None, + ) + + # Some SQLite pragmas work on a per-connection basis so + # query the Settings table and reset the pragmas. The + # Settings table may not exist so catch and ignore the + # OperationalError that may occur. + + try: + select = 'SELECT key, value FROM Settings' + settings = con.execute(select).fetchall() + except sqlite3.OperationalError: + pass + else: + for key, value in settings: + if key.startswith('sqlite_'): + self.reset(key, value, update=False) + + return con + + @property + def _sql(self): + return self._con.execute + + @property + def _sql_retry(self): + sql = self._sql + + # 2018-11-01 GrantJ - Some SQLite builds/versions handle + # the SQLITE_BUSY return value and connection parameter + # "timeout" differently. For a more reliable duration, + # manually retry the statement for 60 seconds. Only used + # by statements which modify the database and do not use + # a transaction (like those in ``__init__`` or ``reset``). + # See Issue #85 for and tests/issue_85.py for more details. + + def _execute_with_retry(statement, *args, **kwargs): + start = time.time() + while True: + try: + return sql(statement, *args, **kwargs) + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + + return _execute_with_retry + + @cl.contextmanager + def transact(self, retry=False): + """Context manager to perform a transaction by locking the cache. + + While the cache is locked, no other write operation is permitted. + Transactions should therefore be as short as possible. Read and write + operations performed in a transaction are atomic. Read operations may + occur concurrent to a transaction. + + Transactions may be nested and may not be shared between threads. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> with cache.transact(): # Atomically increment two keys. + ... _ = cache.incr('total', 123.4) + ... _ = cache.incr('count', 1) + >>> with cache.transact(): # Atomically calculate average. + ... average = cache['total'] / cache['count'] + >>> average + 123.4 + + :param bool retry: retry if database timeout occurs (default False) + :return: context manager for use in `with` statement + :raises Timeout: if database timeout occurs + + """ + with self._transact(retry=retry): + yield + + @cl.contextmanager + def _transact(self, retry=False, filename=None): + _acquireLock() + try: + sql = self._sql + filenames = [] + _disk_remove = self._disk.remove + tid = threading.get_ident() + txn_id = self._txn_id + + if tid == txn_id: + begin = False + else: + while True: + try: + sql('BEGIN IMMEDIATE') + begin = True + self._txn_id = tid + break + except sqlite3.OperationalError: + if retry: + continue + if filename is not None: + _disk_remove(filename) + raise Timeout from None + + try: + yield sql, filenames.append + except BaseException: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('ROLLBACK') + raise + else: + if begin: + assert self._txn_id == tid + self._txn_id = None + sql('COMMIT') + for name in filenames: + if name is not None: + _disk_remove(name) + finally: + _releaseLock() + + def set(self, key, value, expire=None, read=False, tag=None, retry=False): + """Set `key` and `value` item in cache. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was set + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + # The order of SELECT, UPDATE, and INSERT is important below. + # + # Typical cache usage pattern is: + # + # value = cache.get(key) + # if value is None: + # value = expensive_calculation() + # cache.set(key, value) + # + # Cache.get does not evict expired keys to avoid writes during lookups. + # Commonly used/expired keys will therefore remain in the cache making + # an UPDATE the preferred path. + # + # The alternative is to assume the key is not present by first trying + # to INSERT and then handling the IntegrityError that occurs from + # violating the UNIQUE constraint. This optimistic approach was + # rejected based on the common cache usage pattern. + # + # INSERT OR REPLACE aka UPSERT is not used because the old filename may + # need cleanup. + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_filename),) = rows + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + def __setitem__(self, key, value): + """Set corresponding `value` for `key` in cache. + + :param key: key for item + :param value: value for item + :return: corresponding value + :raises KeyError: if key is not found + + """ + self.set(key, value, retry=True) + + def _row_update(self, rowid, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql( + 'UPDATE Cache SET' + ' store_time = ?,' + ' expire_time = ?,' + ' access_time = ?,' + ' access_count = ?,' + ' tag = ?,' + ' size = ?,' + ' mode = ?,' + ' filename = ?,' + ' value = ?' + ' WHERE rowid = ?', + ( + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + rowid, + ), + ) + + def _row_insert(self, key, raw, now, columns): + sql = self._sql + expire_time, tag, size, mode, filename, value = columns + sql( + 'INSERT INTO Cache(' + ' key, raw, store_time, expire_time, access_time,' + ' access_count, tag, size, mode, filename, value' + ') VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + ( + key, + raw, + now, # store_time + expire_time, + now, # access_time + 0, # access_count + tag, + size, + mode, + filename, + value, + ), + ) + + def _cull(self, now, sql, cleanup, limit=None): + cull_limit = self.cull_limit if limit is None else limit + + if cull_limit == 0: + return + + # Evict expired keys. + + select_expired_template = ( + 'SELECT %s FROM Cache' + ' WHERE expire_time IS NOT NULL AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + + select_expired = select_expired_template % 'filename' + rows = sql(select_expired, (now, cull_limit)).fetchall() + + if rows: + delete_expired = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_expired_template % 'rowid' + ) + sql(delete_expired, (now, cull_limit)) + + for (filename,) in rows: + cleanup(filename) + + cull_limit -= len(rows) + + if cull_limit == 0: + return + + # Evict keys by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None or self.volume() < self.size_limit: + return + + select_filename = select_policy.format(fields='filename', now=now) + rows = sql(select_filename, (cull_limit,)).fetchall() + + if rows: + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' % ( + select_policy.format(fields='rowid', now=now) + ) + sql(delete, (cull_limit,)) + + for (filename,) in rows: + cleanup(filename) + + def touch(self, key, expire=None, retry=False): + """Touch `key` in cache and update `expire` time. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param float expire: seconds until item expires + (default None, no expiry) + :param bool retry: retry if database timeout occurs (default False) + :return: True if key was touched + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + + with self._transact(retry) as (sql, _): + rows = sql( + 'SELECT rowid, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_expire_time),) = rows + + if old_expire_time is None or old_expire_time > now: + sql( + 'UPDATE Cache SET expire_time = ? WHERE rowid = ?', + (expire_time, rowid), + ) + return True + + return False + + def add(self, key, value, expire=None, read=False, tag=None, retry=False): + """Add `key` and `value` item to cache. + + Similar to `set`, but only add to cache if key not present. + + Operation is atomic. Only one concurrent add operation for a given key + will succeed. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param value: value for item + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was added + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read, key=key) + columns = (expire_time, tag, size, mode, filename, db_value) + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename, expire_time FROM Cache' + ' WHERE key = ? AND raw = ?', + (db_key, raw), + ).fetchall() + + if rows: + ((rowid, old_filename, old_expire_time),) = rows + + if old_expire_time is None or old_expire_time > now: + cleanup(filename) + return False + + cleanup(old_filename) + self._row_update(rowid, now, columns) + else: + self._row_insert(db_key, raw, now, columns) + + self._cull(now, sql, cleanup) + + return True + + def incr(self, key, delta=1, default=0, retry=False): + """Increment value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent increment operations will be + counted individually. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to increment (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + now = time.time() + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, filename, value FROM Cache' + ' WHERE key = ? AND raw = ?' + ) + + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (db_key, raw)).fetchall() + + if not rows: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store( + value, False, key=key + ) + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + return value + + ((rowid, expire_time, filename, value),) = rows + + if expire_time is not None and expire_time < now: + if default is None: + raise KeyError(key) + + value = default + delta + columns = (None, None) + self._disk.store( + value, False, key=key + ) + self._row_update(rowid, now, columns) + self._cull(now, sql, cleanup) + cleanup(filename) + return value + + value += delta + + columns = 'store_time = ?, value = ?' + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + + if update_column is not None: + columns += ', ' + update_column.format(now=now) + + update = 'UPDATE Cache SET %s WHERE rowid = ?' % columns + sql(update, (now, value, rowid)) + + return value + + def decr(self, key, delta=1, default=0, retry=False): + """Decrement value by delta for item with key. + + If key is missing and default is None then raise KeyError. Else if key + is missing and default is not None then use default for value. + + Operation is atomic. All concurrent decrement operations will be + counted individually. + + Unlike Memcached, negative values are supported. Value may be + decremented below zero. + + Assumes value may be stored in a SQLite column. Most builds that target + machines with 64-bit pointer widths will support 64-bit signed + integers. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param int delta: amount to decrement (default 1) + :param int default: value if key is missing (default 0) + :param bool retry: retry if database timeout occurs (default False) + :return: new value for item + :raises KeyError: if key is not found and default is None + :raises Timeout: if database timeout occurs + + """ + return self.incr(key, -delta, default, retry) + + def get( + self, + key, + default=None, + read=False, + expire_time=False, + tag=False, + retry=False, + ): + """Retrieve value from cache. If `key` is missing, return `default`. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool read: if True, return file handle to value + (default False) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + update_column = EVICTION_POLICY[self.eviction_policy]['get'] + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = (default, None, None) + elif expire_time or tag: + default = (default, None) + + if not self.statistics and update_column is None: + # Fast path, no transaction necessary. + + rows = self._sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError: + # Key was deleted before we could retrieve result. + return default + + else: # Slow path, transaction required. + cache_hit = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "hits"' + ) + cache_miss = ( + 'UPDATE Settings SET value = value + 1 WHERE key = "misses"' + ) + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + if self.statistics: + sql(cache_miss) + return default + + ( + (rowid, db_expire_time, db_tag, mode, filename, db_value), + ) = rows # noqa: E127 + + try: + value = self._disk.fetch(mode, filename, db_value, read) + except IOError: + # Key was deleted before we could retrieve result. + if self.statistics: + sql(cache_miss) + return default + + if self.statistics: + sql(cache_hit) + + now = time.time() + update = 'UPDATE Cache SET %s WHERE rowid = ?' + + if update_column is not None: + sql(update % update_column.format(now=now), (rowid,)) + + if expire_time and tag: + return (value, db_expire_time, db_tag) + elif expire_time: + return (value, db_expire_time) + elif tag: + return (value, db_tag) + else: + return value + + def __getitem__(self, key): + """Return corresponding value for `key` from cache. + + :param key: key matching item + :return: corresponding value + :raises KeyError: if key is not found + + """ + value = self.get(key, default=ENOVAL, retry=True) + if value is ENOVAL: + raise KeyError(key) + return value + + def read(self, key, retry=False): + """Return file handle value corresponding to `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: file open for reading in binary mode + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + handle = self.get(key, default=ENOVAL, read=True, retry=retry) + if handle is ENOVAL: + raise KeyError(key) + return handle + + def __contains__(self, key): + """Return `True` if `key` matching item is found in cache. + + :param key: key matching item + :return: True if key matching item + + """ + sql = self._sql + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + rows = sql(select, (db_key, raw, time.time())).fetchall() + + return bool(rows) + + def pop( + self, key, default=None, expire_time=False, tag=False, retry=False + ): # noqa: E501 + """Remove corresponding item for `key` from cache and return value. + + If `key` is missing, return `default`. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key for item + :param default: value to return if key is missing (default None) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: value for item or default if key not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + select = ( + 'SELECT rowid, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)' + ) + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + with self._transact(retry) as (sql, _): + rows = sql(select, (db_key, raw, time.time())).fetchall() + + if not rows: + return default + + ((rowid, db_expire_time, db_tag, mode, filename, db_value),) = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + try: + value = self._disk.fetch(mode, filename, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + return default + finally: + if filename is not None: + self._disk.remove(filename) + + if expire_time and tag: + return value, db_expire_time, db_tag + elif expire_time: + return value, db_expire_time + elif tag: + return value, db_tag + else: + return value + + def __delitem__(self, key, retry=True): + """Delete corresponding item for `key` from cache. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default `True`). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default True) + :raises KeyError: if key is not found + :raises Timeout: if database timeout occurs + + """ + db_key, raw = self._disk.put(key) + + with self._transact(retry) as (sql, cleanup): + rows = sql( + 'SELECT rowid, filename FROM Cache' + ' WHERE key = ? AND raw = ?' + ' AND (expire_time IS NULL OR expire_time > ?)', + (db_key, raw, time.time()), + ).fetchall() + + if not rows: + raise KeyError(key) + + ((rowid, filename),) = rows + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(filename) + + return True + + def delete(self, key, retry=False): + """Delete corresponding item for `key` from cache. + + Missing keys are ignored. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param key: key matching item + :param bool retry: retry if database timeout occurs (default False) + :return: True if item was deleted + :raises Timeout: if database timeout occurs + + """ + # pylint: disable=unnecessary-dunder-call + try: + return self.__delitem__(key, retry=retry) + except KeyError: + return False + + def push( + self, + value, + prefix=None, + side='back', + expire=None, + read=False, + tag=None, + retry=False, + ): + """Push `value` onto `side` of queue identified by `prefix` in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + Defaults to pushing value on back of queue. Set side to 'front' to push + value on front of queue. Side must be one of 'back' or 'front'. + + Operation is atomic. Concurrent operations will be serialized. + + When `read` is `True`, `value` should be a file-like object opened + for reading in binary mode. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull`. + + >>> cache = Cache() + >>> print(cache.push('first value')) + 500000000000000 + >>> cache.get(500000000000000) + 'first value' + >>> print(cache.push('second value')) + 500000000000001 + >>> print(cache.push('third value', side='front')) + 499999999999999 + >>> cache.push(1234, prefix='userids') + 'userids-500000000000000' + + :param value: value for item + :param str prefix: key prefix (default None, key is integer) + :param str side: either 'back' or 'front' (default 'back') + :param float expire: seconds until the key expires + (default None, no expiry) + :param bool read: read value as bytes from file (default False) + :param str tag: text to associate with key (default None) + :param bool retry: retry if database timeout occurs (default False) + :return: key for item in cache + :raises Timeout: if database timeout occurs + + """ + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + now = time.time() + raw = True + expire_time = None if expire is None else now + expire + size, mode, filename, db_value = self._disk.store(value, read) + columns = (expire_time, tag, size, mode, filename, db_value) + order = {'back': 'DESC', 'front': 'ASC'} + select = ( + 'SELECT key FROM Cache' + ' WHERE ? < key AND key < ? AND raw = ?' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + with self._transact(retry, filename) as (sql, cleanup): + rows = sql(select, (min_key, max_key, raw)).fetchall() + + if rows: + ((key,),) = rows + + if prefix is not None: + num = int(key[(key.rfind('-') + 1) :]) + else: + num = key + + if side == 'back': + num += 1 + else: + assert side == 'front' + num -= 1 + else: + num = 500000000000000 + + if prefix is not None: + db_key = '{0}-{1:015d}'.format(prefix, num) + else: + db_key = num + + self._row_insert(db_key, raw, now, columns) + self._cull(now, sql, cleanup) + + return db_key + + def pull( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): + """Pull key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to pulling key and value item pairs from front of queue. Set + side to 'back' to pull from back of queue. Side must be one of 'front' + or 'back'. + + Operation is atomic. Concurrent operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.push` and `Cache.get`. + + >>> cache = Cache() + >>> cache.pull() + (None, None) + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.pull() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> _, value = cache.pull(side='back') + >>> value + 'c' + >>> cache.push(1234, 'userids') + 'userids-500000000000000' + >>> _, value = cache.pull('userids') + >>> value + 1234 + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.peek + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows + + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + if db_expire is not None and db_expire < time.time(): + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + finally: + if name is not None: + self._disk.remove(name) + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def peek( + self, + prefix=None, + default=(None, None), + side='front', + expire_time=False, + tag=False, + retry=False, + ): + """Peek at key and value item pair from `side` of queue in cache. + + When prefix is None, integer keys are used. Otherwise, string keys are + used in the format "prefix-integer". Integer starts at 500 trillion. + + If queue is empty, return default. + + Defaults to peeking at key and value item pairs from front of queue. + Set side to 'back' to pull from back of queue. Side must be one of + 'front' or 'back'. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + See also `Cache.pull` and `Cache.push`. + + >>> cache = Cache() + >>> for letter in 'abc': + ... print(cache.push(letter)) + 500000000000000 + 500000000000001 + 500000000000002 + >>> key, value = cache.peek() + >>> print(key) + 500000000000000 + >>> value + 'a' + >>> key, value = cache.peek(side='back') + >>> print(key) + 500000000000002 + >>> value + 'c' + + :param str prefix: key prefix (default None, key is integer) + :param default: value to return if key is missing + (default (None, None)) + :param str side: either 'front' or 'back' (default 'front') + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair or default if queue is empty + :raises Timeout: if database timeout occurs + + """ + # Caution: Nearly identical code exists in Cache.pull + if prefix is None: + min_key = 0 + max_key = 999999999999999 + else: + min_key = prefix + '-000000000000000' + max_key = prefix + '-999999999999999' + + order = {'front': 'ASC', 'back': 'DESC'} + select = ( + 'SELECT rowid, key, expire_time, tag, mode, filename, value' + ' FROM Cache WHERE ? < key AND key < ? AND raw = 1' + ' ORDER BY key %s LIMIT 1' + ) % order[side] + + if expire_time and tag: + default = default, None, None + elif expire_time or tag: + default = default, None + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, (min_key, max_key)).fetchall() + + if not rows: + return default + + ( + (rowid, key, db_expire, db_tag, mode, name, db_value), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def peekitem(self, last=True, expire_time=False, tag=False, retry=False): + """Peek at key and value item pair in cache based on iteration order. + + Expired items are deleted from cache. Operation is atomic. Concurrent + operations will be serialized. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + >>> cache = Cache() + >>> for num, letter in enumerate('abc'): + ... cache[letter] = num + >>> cache.peekitem() + ('c', 2) + >>> cache.peekitem(last=False) + ('a', 0) + + :param bool last: last item in iteration order (default True) + :param bool expire_time: if True, return expire_time in tuple + (default False) + :param bool tag: if True, return tag in tuple (default False) + :param bool retry: retry if database timeout occurs (default False) + :return: key and value item pair + :raises KeyError: if cache is empty + :raises Timeout: if database timeout occurs + + """ + order = ('ASC', 'DESC') + select = ( + 'SELECT rowid, key, raw, expire_time, tag, mode, filename, value' + ' FROM Cache ORDER BY rowid %s LIMIT 1' + ) % order[last] + + while True: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select).fetchall() + + if not rows: + raise KeyError('dictionary is empty') + + ( + ( + rowid, + db_key, + raw, + db_expire, + db_tag, + mode, + name, + db_value, + ), + ) = rows + + if db_expire is not None and db_expire < time.time(): + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + cleanup(name) + else: + break + + key = self._disk.get(db_key, raw) + + try: + value = self._disk.fetch(mode, name, db_value, False) + except IOError: + # Key was deleted before we could retrieve result. + continue + break + + if expire_time and tag: + return (key, value), db_expire, db_tag + elif expire_time: + return (key, value), db_expire + elif tag: + return (key, value), db_tag + else: + return key, value + + def memoize( + self, name=None, typed=False, expire=None, tag=None, ignore=() + ): + """Memoizing cache decorator. + + Decorator to wrap callable with memoizing function using cache. + Repeated calls with the same arguments will lookup result in cache and + avoid function evaluation. + + If name is set to None (default), the callable name will be determined + automatically. + + When expire is set to zero, function results will not be set in the + cache. Cache lookups still occur, however. Read + :doc:`case-study-landing-page-caching` for example usage. + + If typed is set to True, function arguments of different types will be + cached separately. For example, f(3) and f(3.0) will be treated as + distinct calls with distinct results. + + The original underlying function is accessible through the __wrapped__ + attribute. This is useful for introspection, for bypassing the cache, + or for rewrapping the function with a different cache. + + >>> from diskcache import Cache + >>> cache = Cache() + >>> @cache.memoize(expire=1, tag='fib') + ... def fibonacci(number): + ... if number == 0: + ... return 0 + ... elif number == 1: + ... return 1 + ... else: + ... return fibonacci(number - 1) + fibonacci(number - 2) + >>> print(fibonacci(100)) + 354224848179261915075 + + An additional `__cache_key__` attribute can be used to generate the + cache key used for the given arguments. + + >>> key = fibonacci.__cache_key__(100) + >>> print(cache[key]) + 354224848179261915075 + + Remember to call memoize when decorating a callable. If you forget, + then a TypeError will occur. Note the lack of parenthenses after + memoize below: + + >>> @cache.memoize + ... def test(): + ... pass + Traceback (most recent call last): + ... + TypeError: name cannot be callable + + :param cache: cache to store callable arguments and return values + :param str name: name given for callable (default None, automatic) + :param bool typed: cache different types separately (default False) + :param float expire: seconds until arguments expire + (default None, no expiry) + :param str tag: text to associate with arguments (default None) + :param set ignore: positional or keyword args to ignore (default ()) + :return: callable decorator + + """ + # Caution: Nearly identical code exists in DjangoCache.memoize + if callable(name): + raise TypeError('name cannot be callable') + + def decorator(func): + """Decorator created by memoize() for callable `func`.""" + base = (full_name(func),) if name is None else (name,) + + @ft.wraps(func) + def wrapper(*args, **kwargs): + """Wrapper for callable to cache arguments and return values.""" + key = wrapper.__cache_key__(*args, **kwargs) + result = self.get(key, default=ENOVAL, retry=True) + + if result is ENOVAL: + result = func(*args, **kwargs) + if expire is None or expire > 0: + self.set(key, result, expire, tag=tag, retry=True) + + return result + + def __cache_key__(*args, **kwargs): + """Make key for cache given function arguments.""" + return args_to_key(base, args, kwargs, typed, ignore) + + wrapper.__cache_key__ = __cache_key__ + return wrapper + + return decorator + + def check(self, fix=False, retry=False): + """Check database and file system consistency. + + Intended for use in testing and post-mortem error analysis. + + While checking the Cache table for consistency, a writer lock is held + on the database. The lock blocks other cache clients from writing to + the database. For caches with many file references, the lock may be + held for a long time. For example, local benchmarking shows that a + cache with 1,000 file references takes ~60ms to check. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool fix: correct inconsistencies + :param bool retry: retry if database timeout occurs (default False) + :return: list of warnings + :raises Timeout: if database timeout occurs + + """ + # pylint: disable=access-member-before-definition,W0201 + with warnings.catch_warnings(record=True) as warns: + sql = self._sql + + # Check integrity of database. + + rows = sql('PRAGMA integrity_check').fetchall() + + if len(rows) != 1 or rows[0][0] != 'ok': + for (message,) in rows: + warnings.warn(message) + + if fix: + sql('VACUUM') + + with self._transact(retry) as (sql, _): + + # Check Cache.filename against file system. + + filenames = set() + select = ( + 'SELECT rowid, size, filename FROM Cache' + ' WHERE filename IS NOT NULL' + ) + + rows = sql(select).fetchall() + + for rowid, size, filename in rows: + full_path = op.join(self._directory, filename) + filenames.add(full_path) + + if op.exists(full_path): + real_size = op.getsize(full_path) + + if size != real_size: + message = 'wrong file size: %s, %d != %d' + args = full_path, real_size, size + warnings.warn(message % args) + + if fix: + sql( + 'UPDATE Cache SET size = ?' + ' WHERE rowid = ?', + (real_size, rowid), + ) + + continue + + warnings.warn('file not found: %s' % full_path) + + if fix: + sql('DELETE FROM Cache WHERE rowid = ?', (rowid,)) + + # Check file system against Cache.filename. + + for dirpath, _, files in os.walk(self._directory): + paths = [op.join(dirpath, filename) for filename in files] + error = set(paths) - filenames + + for full_path in error: + if DBNAME in full_path: + continue + + message = 'unknown file: %s' % full_path + warnings.warn(message, UnknownFileWarning) + + if fix: + os.remove(full_path) + + # Check for empty directories. + + for dirpath, dirs, files in os.walk(self._directory): + if not (dirs or files): + message = 'empty directory: %s' % dirpath + warnings.warn(message, EmptyDirWarning) + + if fix: + os.rmdir(dirpath) + + # Check Settings.count against count of Cache rows. + + self.reset('count') + ((count,),) = sql('SELECT COUNT(key) FROM Cache').fetchall() + + if self.count != count: + message = 'Settings.count != COUNT(Cache.key); %d != %d' + warnings.warn(message % (self.count, count)) + + if fix: + sql( + 'UPDATE Settings SET value = ? WHERE key = ?', + (count, 'count'), + ) + + # Check Settings.size against sum of Cache.size column. + + self.reset('size') + select_size = 'SELECT COALESCE(SUM(size), 0) FROM Cache' + ((size,),) = sql(select_size).fetchall() + + if self.size != size: + message = 'Settings.size != SUM(Cache.size); %d != %d' + warnings.warn(message % (self.size, size)) + + if fix: + sql( + 'UPDATE Settings SET value = ? WHERE key =?', + (size, 'size'), + ) + + return warns + + def create_tag_index(self): + """Create tag index on cache database. + + It is better to initialize cache with `tag_index=True` than use this. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('CREATE INDEX IF NOT EXISTS Cache_tag_rowid ON Cache(tag, rowid)') + self.reset('tag_index', 1) + + def drop_tag_index(self): + """Drop tag index on cache database. + + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql('DROP INDEX IF EXISTS Cache_tag_rowid') + self.reset('tag_index', 0) + + def evict(self, tag, retry=False): + """Remove items with matching `tag` from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param str tag: tag identifying items + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE tag = ? AND rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [tag, 0, 100] + return self._select_delete(select, args, arg_index=1, retry=retry) + + def expire(self, now=None, retry=False): + """Remove expired items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param float now: current time (default None, ``time.time()`` used) + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, expire_time, filename FROM Cache' + ' WHERE ? < expire_time AND expire_time < ?' + ' ORDER BY expire_time LIMIT ?' + ) + args = [0, now or time.time(), 100] + return self._select_delete(select, args, row_index=1, retry=retry) + + def cull(self, retry=False): + """Cull items from cache until volume is less than size limit. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of items removed + :raises Timeout: if database timeout occurs + + """ + now = time.time() + + # Remove expired items. + + count = self.expire(now) + + # Remove items by policy. + + select_policy = EVICTION_POLICY[self.eviction_policy]['cull'] + + if select_policy is None: + return 0 + + select_filename = select_policy.format(fields='filename', now=now) + + try: + while self.volume() > self.size_limit: + with self._transact(retry) as (sql, cleanup): + rows = sql(select_filename, (10,)).fetchall() + + if not rows: + break + + count += len(rows) + delete = ( + 'DELETE FROM Cache WHERE rowid IN (%s)' + % select_policy.format(fields='rowid', now=now) + ) + sql(delete, (10,)) + + for (filename,) in rows: + cleanup(filename) + except Timeout: + raise Timeout(count) from None + + return count + + def clear(self, retry=False): + """Remove all items from cache. + + Removing items is an iterative process. In each iteration, a subset of + items is removed. Concurrent writes may occur between iterations. + + If a :exc:`Timeout` occurs, the first element of the exception's + `args` attribute will be the number of items removed before the + exception occurred. + + Raises :exc:`Timeout` error when database timeout occurs and `retry` is + `False` (default). + + :param bool retry: retry if database timeout occurs (default False) + :return: count of rows removed + :raises Timeout: if database timeout occurs + + """ + select = ( + 'SELECT rowid, filename FROM Cache' + ' WHERE rowid > ?' + ' ORDER BY rowid LIMIT ?' + ) + args = [0, 100] + return self._select_delete(select, args, retry=retry) + + def _select_delete( + self, select, args, row_index=0, arg_index=0, retry=False + ): + count = 0 + delete = 'DELETE FROM Cache WHERE rowid IN (%s)' + + try: + while True: + with self._transact(retry) as (sql, cleanup): + rows = sql(select, args).fetchall() + + if not rows: + break + + count += len(rows) + sql(delete % ','.join(str(row[0]) for row in rows)) + + for row in rows: + args[arg_index] = row[row_index] + cleanup(row[-1]) + + except Timeout: + raise Timeout(count) from None + + return count + + def iterkeys(self, reverse=False): + """Iterate Cache keys in database sort order. + + >>> cache = Cache() + >>> for key in [4, 1, 3, 0, 2]: + ... cache[key] = key + >>> list(cache.iterkeys()) + [0, 1, 2, 3, 4] + >>> list(cache.iterkeys(reverse=True)) + [4, 3, 2, 1, 0] + + :param bool reverse: reverse sort order (default False) + :return: iterator of Cache keys + + """ + sql = self._sql + limit = 100 + _disk_get = self._disk.get + + if reverse: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key DESC, raw DESC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw < ? OR key < ?' + ' ORDER BY key DESC, raw DESC LIMIT ?' + ) + else: + select = ( + 'SELECT key, raw FROM Cache' + ' ORDER BY key ASC, raw ASC LIMIT 1' + ) + iterate = ( + 'SELECT key, raw FROM Cache' + ' WHERE key = ? AND raw > ? OR key > ?' + ' ORDER BY key ASC, raw ASC LIMIT ?' + ) + + row = sql(select).fetchall() + + if row: + ((key, raw),) = row + else: + return + + yield _disk_get(key, raw) + + while True: + rows = sql(iterate, (key, raw, key, limit)).fetchall() + + if not rows: + break + + for key, raw in rows: + yield _disk_get(key, raw) + + def _iter(self, ascending=True): + sql = self._sql + rows = sql('SELECT MAX(rowid) FROM Cache').fetchall() + ((max_rowid,),) = rows + yield # Signal ready. + + if max_rowid is None: + return + + bound = max_rowid + 1 + limit = 100 + _disk_get = self._disk.get + rowid = 0 if ascending else bound + select = ( + 'SELECT rowid, key, raw FROM Cache' + ' WHERE ? < rowid AND rowid < ?' + ' ORDER BY rowid %s LIMIT ?' + ) % ('ASC' if ascending else 'DESC') + + while True: + if ascending: + args = (rowid, bound, limit) + else: + args = (0, rowid, limit) + + rows = sql(select, args).fetchall() + + if not rows: + break + + for rowid, key, raw in rows: + yield _disk_get(key, raw) + + def __iter__(self): + """Iterate keys in cache including expired items.""" + iterator = self._iter() + next(iterator) + return iterator + + def __reversed__(self): + """Reverse iterate keys in cache including expired items.""" + iterator = self._iter(ascending=False) + next(iterator) + return iterator + + def stats(self, enable=True, reset=False): + """Return cache statistics hits and misses. + + :param bool enable: enable collecting statistics (default True) + :param bool reset: reset hits and misses to 0 (default False) + :return: (hits, misses) + + """ + # pylint: disable=E0203,W0201 + result = (self.reset('hits'), self.reset('misses')) + + if reset: + self.reset('hits', 0) + self.reset('misses', 0) + + self.reset('statistics', enable) + + return result + + def volume(self): + """Return estimated total size of cache on disk. + + :return: size in bytes + + """ + ((page_count,),) = self._sql('PRAGMA page_count').fetchall() + total_size = self._page_size * page_count + self.reset('size') + return total_size + + def close(self): + """Close database connection.""" + con = getattr(self._local, 'con', None) + + if con is None: + return + + con.close() + + try: + delattr(self._local, 'con') + except AttributeError: + pass + + def __enter__(self): + # Create connection in thread. + # pylint: disable=unused-variable + connection = self._con # noqa + return self + + def __exit__(self, *exception): + self.close() + + def __len__(self): + """Count of items in cache including expired items.""" + return self.reset('count') + + def __getstate__(self): + return (self.directory, self.timeout, type(self.disk)) + + def __setstate__(self, state): + self.__init__(*state) + + def reset(self, key, value=ENOVAL, update=True): + """Reset `key` and `value` item from Settings table. + + Use `reset` to update the value of Cache settings correctly. Cache + settings are stored in the Settings table of the SQLite database. If + `update` is ``False`` then no attempt is made to update the database. + + If `value` is not given, it is reloaded from the Settings + table. Otherwise, the Settings table is updated. + + Settings with the ``disk_`` prefix correspond to Disk + attributes. Updating the value will change the unprefixed attribute on + the associated Disk instance. + + Settings with the ``sqlite_`` prefix correspond to SQLite + pragmas. Updating the value will execute the corresponding PRAGMA + statement. + + SQLite PRAGMA statements may be executed before the Settings table + exists in the database by setting `update` to ``False``. + + :param str key: Settings key for item + :param value: value for item (optional) + :param bool update: update database Settings table (default True) + :return: updated value for item + :raises Timeout: if database timeout occurs + + """ + sql = self._sql + sql_retry = self._sql_retry + + if value is ENOVAL: + select = 'SELECT value FROM Settings WHERE key = ?' + ((value,),) = sql_retry(select, (key,)).fetchall() + setattr(self, key, value) + return value + + if update: + statement = 'UPDATE Settings SET value = ? WHERE key = ?' + sql_retry(statement, (value, key)) + + if key.startswith('sqlite_'): + pragma = key[7:] + + # 2016-02-17 GrantJ - PRAGMA and isolation_level=None + # don't always play nicely together. Retry setting the + # PRAGMA. I think some PRAGMA statements expect to + # immediately take an EXCLUSIVE lock on the database. I + # can't find any documentation for this but without the + # retry, stress will intermittently fail with multiple + # processes. + + # 2018-11-05 GrantJ - Avoid setting pragma values that + # are already set. Pragma settings like auto_vacuum and + # journal_mode can take a long time or may not work after + # tables have been created. + + start = time.time() + while True: + try: + try: + ((old_value,),) = sql( + 'PRAGMA %s' % (pragma) + ).fetchall() + update = old_value != value + except ValueError: + update = True + if update: + sql('PRAGMA %s = %s' % (pragma, value)).fetchall() + break + except sqlite3.OperationalError as exc: + if str(exc) != 'database is locked': + raise + diff = time.time() - start + if diff > 60: + raise + time.sleep(0.001) + elif key.startswith('disk_'): + attr = key[5:] + setattr(self._disk, attr, value) + + setattr(self, key, value) + return value + +if hasattr(os, 'register_at_fork'): + _lock = threading.RLock() + + def _acquireLock(): + global _lock + if _lock: + try: + _lock.acquire() + except BaseException: + _lock.release() + raise + + def _releaseLock(): + global _lock + if _lock: + _lock.release() + + def _after_at_fork_child_reinit_locks(): + global _lock + _lock = threading.RLock() + + os.register_at_fork(before=_acquireLock, + after_in_child=_after_at_fork_child_reinit_locks, + after_in_parent=_releaseLock) +else: + def _acquireLock(): + pass + + def _releaseLock(): + pass From a98b3293d0d7f94833f977e8d2b7b63c25010e98 Mon Sep 17 00:00:00 2001 From: Ariel Eizenberg Date: Mon, 25 Dec 2023 09:19:46 +0200 Subject: [PATCH 3/5] cleanup test --- tests/test_fork_multithreading.py | 145 +++++++++++++++--------------- 1 file changed, 71 insertions(+), 74 deletions(-) diff --git a/tests/test_fork_multithreading.py b/tests/test_fork_multithreading.py index d103462..956715c 100644 --- a/tests/test_fork_multithreading.py +++ b/tests/test_fork_multithreading.py @@ -1,74 +1,71 @@ -""" -Test diskcache.core.Cache behaviour when process is forking. -Make sure it does not deadlock on the sqlite3 transaction lock if -forked while the lock is in use. -""" - -import errno -import hashlib -import io -import os -import os.path as op -import sys -import pathlib -import pickle -import shutil -import sqlite3 -import subprocess as sp -import tempfile -import threading -import time -import warnings -from threading import Thread -from unittest import mock - -if sys.platform != "win32": - import signal - -import pytest - -import diskcache as dc - -REPEATS = 1000 - -@pytest.fixture -def cache(): - with dc.Cache() as cache: - yield cache - shutil.rmtree(cache.directory, ignore_errors=True) - -def _test_thread_imp(cache): - for i in range(REPEATS * 10): - cache.set(i, i) - -def _test_wait_pid(pid): - _, status = os.waitpid(pid, 0) - assert status == 0, "Child died unexpectedly" - -@pytest.mark.skipif(sys.platform == "win32", reason="skips this test on Windows") -def test_fork_multithreading(cache): - thread = Thread(target=_test_thread_imp, args=(cache,)) - thread.start() - try: - for i in range(REPEATS): - pid = os.fork() - if pid == 0: - cache.set(i, 0) - os._exit(0) - else: - thread = Thread(target=_test_wait_pid, args=(pid,)) - thread.start() - thread.join(timeout=10) - if thread.is_alive(): - os.kill(pid, signal.SIGKILL) - thread.join() - assert False, "Deadlock detected." - except OSError as e: - if e.errno != errno.EINTR: - raise - - thread.join() - -with dc.Cache() as cache: - test_fork_multithreading(cache) -shutil.rmtree(cache.directory, ignore_errors=True) +""" +Test diskcache.core.Cache behaviour when process is forking. +Make sure it does not deadlock on the sqlite3 transaction lock if +forked while the lock is in use. +""" + +import errno +import hashlib +import io +import os +import os.path as op +import sys +import pathlib +import pickle +import shutil +import sqlite3 +import subprocess as sp +import tempfile +import threading +import time +import warnings +from threading import Thread +from unittest import mock + +if sys.platform != "win32": + import signal + +import pytest + +import diskcache as dc + +REPEATS = 1000 + +@pytest.fixture +def cache(): + with dc.Cache() as cache: + yield cache + shutil.rmtree(cache.directory, ignore_errors=True) + +def _test_thread_imp(cache): + for i in range(REPEATS * 10): + cache.set(i, i) + +def _test_wait_pid(pid): + _, status = os.waitpid(pid, 0) + assert status == 0, "Child died unexpectedly" + +@pytest.mark.skipif(sys.platform == "win32", reason="skips this test on Windows") +def test_fork_multithreading(cache): + thread = Thread(target=_test_thread_imp, args=(cache,)) + thread.start() + try: + for i in range(REPEATS): + pid = os.fork() + if pid == 0: + cache.set(i, 0) + os._exit(0) + else: + thread = Thread(target=_test_wait_pid, args=(pid,)) + thread.start() + thread.join(timeout=10) + if thread.is_alive(): + os.kill(pid, signal.SIGKILL) + thread.join() + assert False, "Deadlock detected." + except OSError as e: + if e.errno != errno.EINTR: + raise + + thread.join() + From 89b508ef91b5f71d73c226d57e814e6989a46c65 Mon Sep 17 00:00:00 2001 From: Ariel Eizenberg Date: Thu, 18 Jan 2024 11:35:36 +0200 Subject: [PATCH 4/5] Update tests/test_fork_multithreading.py Co-authored-by: Grant Jenks --- tests/test_fork_multithreading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fork_multithreading.py b/tests/test_fork_multithreading.py index 956715c..00a47ca 100644 --- a/tests/test_fork_multithreading.py +++ b/tests/test_fork_multithreading.py @@ -45,7 +45,7 @@ def _test_wait_pid(pid): _, status = os.waitpid(pid, 0) assert status == 0, "Child died unexpectedly" -@pytest.mark.skipif(sys.platform == "win32", reason="skips this test on Windows") +@pytest.mark.skipif(sys.platform == "win32", reason="no fork on Windows") def test_fork_multithreading(cache): thread = Thread(target=_test_thread_imp, args=(cache,)) thread.start() From 017eb0dddd70bb5ece467392330282f7ff495445 Mon Sep 17 00:00:00 2001 From: Ariel Eizenberg Date: Thu, 18 Jan 2024 11:36:44 +0200 Subject: [PATCH 5/5] Fix CR - remove _lock test --- diskcache/core.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/diskcache/core.py b/diskcache/core.py index 6633e93..ebf9a75 100644 --- a/diskcache/core.py +++ b/diskcache/core.py @@ -2460,17 +2460,15 @@ def reset(self, key, value=ENOVAL, update=True): def _acquireLock(): global _lock - if _lock: - try: - _lock.acquire() - except BaseException: - _lock.release() - raise + try: + _lock.acquire() + except BaseException: + _lock.release() + raise def _releaseLock(): global _lock - if _lock: - _lock.release() + _lock.release() def _after_at_fork_child_reinit_locks(): global _lock