Module freshlinks.cache

Cache URL response.

Expand source code
"""Cache URL response."""

# native
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import field
from datetime import datetime
from datetime import timedelta
from hashlib import sha256
from pathlib import Path
from time import time
from typing import Dict
from typing import Any
from typing import List
from typing import Optional
from typing import Union
import re

# lib
import ujson

StrOrPath = Union[str, Path]
"""`str` or `Path`."""

DEFAULT_PATH: StrOrPath = "~/.cache/freshlinks"
"""Default cache folder."""

RE_DURATION = re.compile(r"(\d+(?:\.\d+)?)[ ]*([dhms])")
"""Pattern for time strings."""

DURATION_UNITS = {
    "d": "days",
    "h": "hours",
    "m": "minutes",
    "s": "seconds",
}
"""Time string units."""


def to_timedelta(duration: str) -> timedelta:
    """Convert a time duration string into a `timedelta`."""
    parts = re.findall(RE_DURATION, duration.lower())
    args = {"days": 0.0, "hours": 0.0, "minutes": 0.0, "seconds": 0.0}
    for value, unit in parts:
        args[DURATION_UNITS.get(unit, "seconds")] += float(value)
    return timedelta(**args)


def age_time(age: str, start: Optional[datetime] = None) -> int:
    """Return unix time relative to a start date.

    Args:
        age (str): amount of time; a number followed by a time unit
            (s=seconds, m=minutes, h=hours, d=days). `s` is default.

        start (datetime, optional): start date from which to subtract `age`.
            If `None` uses `datetime.now()`. Defaults to `None`.

    Returns:
        int: unix time relative to `start`
    """
    return int(((start or datetime.now()) - to_timedelta(age)).timestamp())


def hash_url(url: str) -> str:
    """Return the hash for a given `url`."""
    return sha256(url.encode("utf-8")).hexdigest()


@dataclass
class LinkEntry:
    is_cached: bool = False
    """Did this entry come from a cache?"""

    version: int = 1
    """Entry format version."""

    time: int = 0
    code: int = 200
    err: str = ""
    url: str = ""
    links: List[str] = field(default_factory=list)

    @staticmethod
    def load(data: Dict[str, Any]) -> LinkEntry:
        return LinkEntry(
            is_cached=True,
            version=data["v"],
            time=data["t"],
            code=data["c"],
            err=data.get("err", ""),
            url=data["url"],
            links=data["links"],
        )

    @property
    def ok(self) -> bool:
        return self.code in [200, 301, 302]

    @property
    def json(self) -> Dict[str, Any]:
        return dict(
            v=self.version,
            t=self.time,
            c=self.code,
            err=self.err,
            url=self.url,
            links=self.links,
        )


class LinkCache:
    """A file-based cache for storing metadata about URLs."""

    path: Path
    """Cache folder."""

    max_time: int = 0
    """Cache entries older than this are considered invalid."""

    def __init__(
        self,
        store: Dict[str, LinkEntry],
        max_time: int = 0,
        path: StrOrPath = DEFAULT_PATH,
        preload: bool = False,
    ):
        self.store = store
        self.max_time = max_time if max_time > 0 else int(time())
        self.path = Path(path).expanduser()
        self.path.mkdir(exist_ok=True)  # ensure cache folder exists
        if preload:
            self.preload()

    def preload(self) -> None:
        for item in self.path.glob("*.json"):
            self.load(item.stem)

    def load(self, hashed: str) -> Optional[LinkEntry]:
        path = self.path / f"{hashed}.json"
        if not path.exists():
            return None

        data = ujson.loads(path.read_text())
        if data.get("t", 0) < self.max_time:
            return None

        entry = LinkEntry.load(data)
        self.store[entry.url] = entry
        return entry

    def __contains__(self, url: str) -> bool:
        return url in self.store or (self.path / f"{hash_url(url)}.json").exists()

    def __getitem__(self, url: str) -> Optional[LinkEntry]:
        try:
            return self.store[url]
        except KeyError:
            return self.load(hash_url(url))

    def __setitem__(self, url: str, entry: LinkEntry) -> None:
        self.store[url] = entry
        if entry:
            (self.path / f"{hash_url(url)}.json").write_text(
                ujson.dumps(
                    entry.json,
                    ensure_ascii=False,
                    escape_forward_slashes=False,
                )
            )

Global variables

var StrOrPath

str or Path.

var DEFAULT_PATH : Union[str, pathlib.Path]

Default cache folder.

var RE_DURATION

Pattern for time strings.

var DURATION_UNITS

Time string units.

Functions

def to_timedelta(duration: str) ‑> datetime.timedelta

Convert a time duration string into a timedelta.

Expand source code
def to_timedelta(duration: str) -> timedelta:
    """Convert a time duration string into a `timedelta`."""
    parts = re.findall(RE_DURATION, duration.lower())
    args = {"days": 0.0, "hours": 0.0, "minutes": 0.0, "seconds": 0.0}
    for value, unit in parts:
        args[DURATION_UNITS.get(unit, "seconds")] += float(value)
    return timedelta(**args)
def age_time(age: str, start: Optional[datetime] = None) ‑> int

Return unix time relative to a start date.

Args

age : str
amount of time; a number followed by a time unit (s=seconds, m=minutes, h=hours, d=days). s is default.
start : datetime, optional
start date from which to subtract age. If None uses datetime.now(). Defaults to None.

Returns

int
unix time relative to start
Expand source code
def age_time(age: str, start: Optional[datetime] = None) -> int:
    """Return unix time relative to a start date.

    Args:
        age (str): amount of time; a number followed by a time unit
            (s=seconds, m=minutes, h=hours, d=days). `s` is default.

        start (datetime, optional): start date from which to subtract `age`.
            If `None` uses `datetime.now()`. Defaults to `None`.

    Returns:
        int: unix time relative to `start`
    """
    return int(((start or datetime.now()) - to_timedelta(age)).timestamp())
def hash_url(url: str) ‑> str

Return the hash for a given url.

Expand source code
def hash_url(url: str) -> str:
    """Return the hash for a given `url`."""
    return sha256(url.encode("utf-8")).hexdigest()

Classes

class LinkEntry (is_cached: bool = False, version: int = 1, time: int = 0, code: int = 200, err: str = '', url: str = '', links: List[str] = <factory>)

LinkEntry(is_cached: 'bool' = False, version: 'int' = 1, time: 'int' = 0, code: 'int' = 200, err: 'str' = '', url: 'str' = '', links: 'List[str]' = )

Expand source code
class LinkEntry:
    is_cached: bool = False
    """Did this entry come from a cache?"""

    version: int = 1
    """Entry format version."""

    time: int = 0
    code: int = 200
    err: str = ""
    url: str = ""
    links: List[str] = field(default_factory=list)

    @staticmethod
    def load(data: Dict[str, Any]) -> LinkEntry:
        return LinkEntry(
            is_cached=True,
            version=data["v"],
            time=data["t"],
            code=data["c"],
            err=data.get("err", ""),
            url=data["url"],
            links=data["links"],
        )

    @property
    def ok(self) -> bool:
        return self.code in [200, 301, 302]

    @property
    def json(self) -> Dict[str, Any]:
        return dict(
            v=self.version,
            t=self.time,
            c=self.code,
            err=self.err,
            url=self.url,
            links=self.links,
        )

Class variables

var is_cached : bool

Did this entry come from a cache?

var version : int

Entry format version.

var time : int
var code : int
var err : str
var url : str

Static methods

def load(data: Dict[str, Any]) ‑> LinkEntry
Expand source code
@staticmethod
def load(data: Dict[str, Any]) -> LinkEntry:
    return LinkEntry(
        is_cached=True,
        version=data["v"],
        time=data["t"],
        code=data["c"],
        err=data.get("err", ""),
        url=data["url"],
        links=data["links"],
    )

Instance variables

var ok : bool
Expand source code
@property
def ok(self) -> bool:
    return self.code in [200, 301, 302]
var json : Dict[str, Any]
Expand source code
@property
def json(self) -> Dict[str, Any]:
    return dict(
        v=self.version,
        t=self.time,
        c=self.code,
        err=self.err,
        url=self.url,
        links=self.links,
    )
class LinkCache (store: Dict[str, LinkEntry], max_time: int = 0, path: StrOrPath = '~/.cache/freshlinks', preload: bool = False)

A file-based cache for storing metadata about URLs.

Expand source code
class LinkCache:
    """A file-based cache for storing metadata about URLs."""

    path: Path
    """Cache folder."""

    max_time: int = 0
    """Cache entries older than this are considered invalid."""

    def __init__(
        self,
        store: Dict[str, LinkEntry],
        max_time: int = 0,
        path: StrOrPath = DEFAULT_PATH,
        preload: bool = False,
    ):
        self.store = store
        self.max_time = max_time if max_time > 0 else int(time())
        self.path = Path(path).expanduser()
        self.path.mkdir(exist_ok=True)  # ensure cache folder exists
        if preload:
            self.preload()

    def preload(self) -> None:
        for item in self.path.glob("*.json"):
            self.load(item.stem)

    def load(self, hashed: str) -> Optional[LinkEntry]:
        path = self.path / f"{hashed}.json"
        if not path.exists():
            return None

        data = ujson.loads(path.read_text())
        if data.get("t", 0) < self.max_time:
            return None

        entry = LinkEntry.load(data)
        self.store[entry.url] = entry
        return entry

    def __contains__(self, url: str) -> bool:
        return url in self.store or (self.path / f"{hash_url(url)}.json").exists()

    def __getitem__(self, url: str) -> Optional[LinkEntry]:
        try:
            return self.store[url]
        except KeyError:
            return self.load(hash_url(url))

    def __setitem__(self, url: str, entry: LinkEntry) -> None:
        self.store[url] = entry
        if entry:
            (self.path / f"{hash_url(url)}.json").write_text(
                ujson.dumps(
                    entry.json,
                    ensure_ascii=False,
                    escape_forward_slashes=False,
                )
            )

Class variables

var path : pathlib.Path

Cache folder.

var max_time : int

Cache entries older than this are considered invalid.

Methods

def preload(self) ‑> None
Expand source code
def preload(self) -> None:
    for item in self.path.glob("*.json"):
        self.load(item.stem)
def load(self, hashed: str) ‑> Optional[LinkEntry]
Expand source code
def load(self, hashed: str) -> Optional[LinkEntry]:
    path = self.path / f"{hashed}.json"
    if not path.exists():
        return None

    data = ujson.loads(path.read_text())
    if data.get("t", 0) < self.max_time:
        return None

    entry = LinkEntry.load(data)
    self.store[entry.url] = entry
    return entry