diff --git a/mypy.ini b/mypy.ini index 5d066a81..a16c1393 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,14 +1,13 @@ [mypy] -namespace_packages = True pretty = True show_error_context = True -show_error_codes = True show_column_numbers = True show_error_end = True +warn_redundant_casts = True warn_unused_ignores = True check_untyped_defs = True -enable_error_code = possibly-undefined strict_equality = True +enable_error_code = possibly-undefined # not sure why mypy started discovering it (since 0.800??) [mypy-hypothesis] diff --git a/ruff.toml b/ruff.toml index 54f621c6..83736c39 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,4 +1,50 @@ +target-version = "py38" # NOTE: inferred from pyproject.toml if present + +lint.extend-select = [ + "F", # flakes rules -- default, but extend just in case + "E", # pycodestyle -- default, but extend just in case + "W", # various warnings + + "B", # 'bugbear' set -- various possible bugs + "C4", # flake8-comprehensions -- unnecessary list/map/dict calls + "COM", # trailing commas + "EXE", # various checks wrt executable files + # "I", # sort imports + "ICN", # various import conventions + "FBT", # detect use of boolean arguments + "FURB", # various rules + "PERF", # various potential performance speedups + "PD", # pandas rules + "PIE", # 'misc' lints + "PLC", # pylint convention rules + "PLR", # pylint refactor rules + "PLW", # pylint warnings + "PT", # pytest stuff + "PYI", # various type hinting rules + "RET", # early returns + "RUF", # various ruff-specific rules + "TID", # various imports suggestions + "TRY", # various exception handling rules + "UP", # detect deprecated python stdlib stuff + "FA", # suggest using from __future__ import annotations + "PTH", # pathlib migration + "ARG", # unused argument checks + "A", # builtin shadowing + # "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying + + # "ALL", # uncomment this to check for new rules! +] + lint.ignore = [ + "D", # annoying nags about docstrings + "N", # pep naming + "TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING + "S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks + "DTZ", # datetimes checks -- complaining about missing tz and mostly false positives + "FIX", # complains about fixmes/todos -- annoying + "TD", # complains about todo formatting -- too annoying + "ANN", # missing type annotations? seems way to strict though + ### too opinionated style checks "E501", # too long lines "E702", # Multiple statements on one line (semicolon) @@ -17,9 +63,84 @@ lint.ignore = [ "E402", # Module level import not at top of file ### maybe consider these soon -# sometimes it's useful to give a variable a name even if we don't use it as a documentation -# on the other hand, often is a sign of error + # sometimes it's useful to give a variable a name even if we don't use it as a documentation + # on the other hand, often is a sign of error "F841", # Local variable `count` is assigned to but never used - "F401", # imported but unused ### + + "RUF100", # unused noqa -- handle later + "RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs + +### these are just nitpicky, we usually know better + "PLR0911", # too many return statements + "PLR0912", # too many branches + "PLR0913", # too many function arguments + "PLR0915", # too many statements + "PLR1714", # consider merging multiple comparisons + "PLR2044", # line with empty comment + "PLR5501", # use elif instead of else if + "PLR2004", # magic value in comparison -- super annoying in tests +### + "PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check + + "B009", # calling gettattr with constant attribute -- this is useful to convince mypy + "B010", # same as above, but setattr + "B011", # complains about assert False + "B017", # pytest.raises(Exception) + "B023", # seems to result in false positives? + "B028", # suggest using explicit stacklevel? TODO double check later, but not sure it's useful + + # complains about useless pass, but has sort of a false positive if the function has a docstring? + # this is common for click entrypoints (e.g. in __main__), so disable + "PIE790", + + # a bit too annoying, offers to convert for loops to list comprehension + # , which may heart readability + "PERF401", + + # suggests no using exception in for loops + # we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost" + "PERF203", + + "RET504", # unnecessary assignment before returning -- that can be useful for readability + "RET505", # unnecessary else after return -- can hurt readability + + "PLW0603", # global variable update.. we usually know why we are doing this + "PLW2901", # for loop variable overwritten, usually this is intentional + + "PT004", # deprecated rule, will be removed later + "PT011", # pytest raises should is too broad + "PT012", # pytest raises should contain a single statement + + "COM812", # trailing comma missing -- mostly just being annoying with long multiline strings + + "PD901", # generic variable name df + + "TRY003", # suggests defining exception messages in exception class -- kinda annoying + "TRY004", # prefer TypeError -- don't see the point + "TRY201", # raise without specifying exception name -- sometimes hurts readability + "TRY400", # TODO double check this, might be useful + "TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging + + "PGH", # TODO force error code in mypy instead + + "TID252", # Prefer absolute imports over relative imports from parent modules + + "UP038", # suggests using | (union) in isisntance checks.. but it results in slower code + + ## too annoying + "T20", # just complains about prints and pprints + "Q", # flake quotes, too annoying + "C90", # some complexity checking + "G004", # logging statement uses f string + "ERA001", # commented out code + "SLF001", # private member accessed + "BLE001", # do not catch 'blind' Exception + "INP001", # complains about implicit namespace packages + "SIM", # some if statements crap + "RSE102", # complains about missing parens in exceptions + ## + + "ARG001", # ugh, kinda annoying when using pytest fixtures + "RUF001", "RUF002", "RUF003", # spams about non-latin characters that we do use for testing ] diff --git a/setup.py b/setup.py index 50496224..fbc57489 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ def main() -> None: 'appdirs', # for portable user directories detection 'tzlocal', 'more_itertools', + 'typing-extensions', 'pytz', 'sqlalchemy>=2.0', # DB api diff --git a/src/promnesia/__init__.py b/src/promnesia/__init__.py index b64e0224..ca91df62 100644 --- a/src/promnesia/__init__.py +++ b/src/promnesia/__init__.py @@ -1,6 +1,7 @@ -from pathlib import Path -from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res +from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res # noqa: F401 # add deprecation warning so eventually this may converted to a namespace package? import warnings + +# TODO think again about it -- what are the pros and cons? warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning) diff --git a/src/promnesia/__main__.py b/src/promnesia/__main__.py index 537e017a..bfdfce7e 100644 --- a/src/promnesia/__main__.py +++ b/src/promnesia/__main__.py @@ -6,11 +6,12 @@ import inspect import os from pathlib import Path +import shlex import shutil from subprocess import run, check_call, Popen import sys from tempfile import TemporaryDirectory, gettempdir -from typing import Callable, Sequence, Iterable, Iterator, Union +from typing import Callable, Sequence, Iterable, Iterator from . import config @@ -22,7 +23,7 @@ from .extract import extract_visits -def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]: +def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]: cfg = config.get() output_dir = cfg.output_dir # not sure if belongs here?? @@ -74,7 +75,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset)) -def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]: +def _do_index(*, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False) -> Iterable[Exception]: # also keep & return errors for further display errors: list[Exception] = [] def it() -> Iterable[Res[DbVisit]]: @@ -98,9 +99,10 @@ def it() -> Iterable[Res[DbVisit]]: def do_index( config_file: Path, - dry: bool=False, - sources_subset: Iterable[Union[str, int]]=(), - overwrite_db: bool=False, + *, + dry: bool = False, + sources_subset: Iterable[str | int] = (), + overwrite_db: bool = False, ) -> Sequence[Exception]: config.load_from(config_file) # meh.. should be cleaner try: @@ -120,7 +122,8 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]: def lazy(name: str) -> Callable[[], Extractor]: # helper to avoid failed imports etc, since people might be lacking necessary dependencies def inner() -> Extractor: - from . import sources + # TODO why this import?? + from . import sources # noqa: F401 module = importlib.import_module(f'promnesia.sources.{name}') return getattr(module, 'index') return inner @@ -145,7 +148,7 @@ def do_demo( config_file: Path | None, dry: bool=False, name: str='demo', - sources_subset: Iterable[Union[str, int]]=(), + sources_subset: Iterable[str | int]=(), overwrite_db: bool=False, ) -> None: with TemporaryDirectory() as tdir: @@ -219,9 +222,10 @@ def _config_check(cfg: Path) -> Iterable[Exception]: logger.info('config: %s', cfg) def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]: - logger.debug(' '.join(map(str, cmd))) - res = run(cmd, **kwargs) + logger.debug(shlex.join(map(str, cmd))) + res = run(cmd, **kwargs) # noqa: PLW1510 if res.returncode > 0: + # TODO what's up with empty exception?? yield Exception() logger.info('Checking syntax...') @@ -239,7 +243,7 @@ def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]: # todo not sure if should be more defensive than check_call here logger.info('Checking type safety...') try: - import mypy + import mypy # noqa: F401 except ImportError: logger.warning("mypy not found, can't use it to check config!") else: @@ -291,7 +295,7 @@ def cli_doctor_server(args: argparse.Namespace) -> None: logger.info('You should see the database path and version above!') -def _ordinal_or_name(s: str) -> Union[str, int]: +def _ordinal_or_name(s: str) -> str | int: try: s = int(s) # type: ignore except ValueError: @@ -328,7 +332,7 @@ def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120) p = argparse.ArgumentParser(formatter_class=F) - subp = p.add_subparsers(dest='mode', ) + subp = p.add_subparsers(dest='mode' ) ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F) add_index_args(ep, default_config_path()) # TODO use some way to override or provide config only via cmdline? @@ -348,7 +352,7 @@ def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server') ap.add_argument( '--as', - choices=list(sorted(demo_sources().keys())), + choices=sorted(demo_sources().keys()), default='guess', help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)', ) @@ -359,7 +363,7 @@ def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh install_server.setup_parser(isp) cp = subp.add_parser('config', help='Config management') - cp.set_defaults(func=lambda *args: cp.print_help()) + cp.set_defaults(func=lambda *_args: cp.print_help()) scp = cp.add_subparsers() ccp = scp.add_parser('check', help='Check config') ccp.set_defaults(func=config_check) @@ -373,7 +377,7 @@ def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh dp = subp.add_parser('doctor', help='Troubleshooting assistant') dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path') - dp.set_defaults(func=lambda *args: dp.print_help()) + dp.set_defaults(func=lambda *_args: dp.print_help()) sdp = dp.add_subparsers() sdp.add_parser('config' , help='Check config' ).set_defaults(func=config_check ) sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db) diff --git a/src/promnesia/cannon.py b/src/promnesia/cannon.py index db412e69..daf856db 100755 --- a/src/promnesia/cannon.py +++ b/src/promnesia/cannon.py @@ -9,15 +9,16 @@ Also some experiments to establish 'URL hierarchy'. """ +from __future__ import annotations # TODO eh?? they fixed mobile.twitter.com? from itertools import chain import re import typing -from typing import Iterable, NamedTuple, Set, Optional, List, Sequence, Union, Tuple, Dict, Any, Collection +from typing import Iterable, NamedTuple, Sequence, Union, Tuple, Any, Collection import urllib.parse -from urllib.parse import urlsplit, parse_qsl, urlunsplit, parse_qs, urlencode, SplitResult +from urllib.parse import urlsplit, parse_qsl, urlunsplit, urlencode, SplitResult # this has some benchmark, but quite a few librarires seem unmaintained, sadly @@ -108,11 +109,11 @@ def canonify_domain(dom: str) -> str: # TODO perhaps, decide if fragment is meaningful (e.g. wiki) or random sequence of letters? class Spec(NamedTuple): - qkeep : Optional[Union[Collection[str], bool]] = None - qremove: Optional[Set[str]] = None + qkeep : Collection[str] | bool | None = None + qremove: set[str] | None = None fkeep : bool = False - def keep_query(self, q: str) -> Optional[int]: # returns order + def keep_query(self, q: str) -> int | None: # returns order if self.qkeep is True: return 1 qkeep = { @@ -134,13 +135,13 @@ def keep_query(self, q: str) -> Optional[int]: # returns order return None @classmethod - def make(cls, **kwargs) -> 'Spec': + def make(cls, **kwargs) -> Spec: return cls(**kwargs) S = Spec # TODO perhaps these can be machine learnt from large set of urls? -specs: Dict[str, Spec] = { +specs: dict[str, Spec] = { 'youtube.com': S( # TODO search_query? qkeep=[ # note: experimental.. order matters here @@ -178,7 +179,6 @@ def make(cls, **kwargs) -> 'Spec': 'source', 'tsid', 'refsrc', 'pnref', 'rc', '_rdr', 'src', 'hc_location', 'section', 'permPage', 'soft', 'pn_ref', 'action', 'ti', 'aref', 'event_time_id', 'action_history', 'filter', 'ref_notif_type', 'has_source', 'source_newsfeed_story_type', - 'ref_notif_type', }, ), 'physicstravelguide.com': S(fkeep=True), # TODO instead, pass fkeep marker object for shorter spec? @@ -221,7 +221,7 @@ def get_spec(dom: str) -> Spec: Parts = Sequence[Tuple[str, str]] -def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts, Frag]: +def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> tuple[Any, Any, Parts, Frag]: if path[:5] == '/from': site = dict(qq).get('site') if site is not None: @@ -232,7 +232,7 @@ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts, # TODO this should be in-place? for brevity? return (domain, path, qq, frag) -def get_spec2(dom: str) -> Optional[Spec2]: +def get_spec2(dom: str) -> Spec2 | None: return { 'news.ycombinator.com': _yc, }.get(dom) @@ -288,7 +288,7 @@ def transform_split(split: SplitResult): Right = Tuple[str, str, str] # the idea is that we can unify certain URLs here and map them to the 'canonical' one # this is a dict only for grouping but should be a list really.. todo - rules: Dict[Left, Right] = { + rules: dict[Left, Right] = { # TODO m. handling might be quite common # f'm.youtube.com/{REST}': ('youtube.com', '{rest}'), ( @@ -322,9 +322,9 @@ def iter_rules(): continue gd = m.groupdict() if len(to) == 2: - to = to + ('', ) + to = (*to, '') - (netloc, path, qq) = [t.format(**gd) for t in to] + (netloc, path, qq) = (t.format(**gd) for t in to) qparts.extend(parse_qsl(qq, keep_blank_values=True)) # TODO hacky.. # TODO eh, qparts should really be a map or something... break @@ -361,7 +361,7 @@ def myunsplit(domain: str, path: str, query: str, fragment: str) -> str: # ] # for re in regexes: -def handle_archive_org(url: str) -> Optional[str]: +def handle_archive_org(url: str) -> str | None: are = r'web.archive.org/web/(?P\d+)/(?P.*)' m = re.fullmatch(are, url) if m is None: @@ -697,8 +697,8 @@ def groups(it, args): # pragma: no cover all_pats = get_patterns() from collections import Counter - c: typing.Counter[Optional[str]] = Counter() - unmatched: List[str] = [] + c: typing.Counter[str | None] = Counter() + unmatched: list[str] = [] def dump(): print(c) diff --git a/src/promnesia/common.py b/src/promnesia/common.py index f6adbc8b..a978f322 100644 --- a/src/promnesia/common.py +++ b/src/promnesia/common.py @@ -12,7 +12,7 @@ from subprocess import run, PIPE, Popen from timeit import default_timer as timer from types import ModuleType -from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar +from typing import NamedTuple, Iterable, TypeVar, Callable, Union, TypeVar, Sequence, Optional import warnings from more_itertools import intersperse @@ -38,14 +38,14 @@ # TODO hmm. arguably, source and context are almost same things... class Loc(NamedTuple): title: str - href: Optional[str]=None + href: Optional[str] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9 @classmethod - def make(cls, title: str, href: Optional[str]=None) -> 'Loc': + def make(cls, title: str, href: str | None=None) -> Loc: return cls(title=title, href=href) @classmethod - def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc': + def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc: lstr = '' if line is None else f':{line}' # todo loc should be url encoded? dunno. # or use line=? eh. I don't know. Just ask in issues. @@ -94,7 +94,7 @@ def _warn_no_xdg_mime() -> None: def _detect_mime_handler() -> str: def exists(what: str) -> bool: try: - r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE) + r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False) except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason _warn_no_xdg_mime() return False @@ -139,12 +139,12 @@ class Visit(NamedTuple): # TODO back to DatetimeIsh, but somehow make compatible to dbcache? dt: datetime locator: Loc - context: Optional[Context] = None - duration: Optional[Second] = None + context: Context | None = None + duration: Second | None = None # TODO shit. I need to insert it in chrome db.... # TODO gonna be hard to fill retroactively. # spent: Optional[Second] = None - debug: Optional[str] = None + debug: str | None = None Result = Union[Visit, Exception] Results = Iterable[Result] @@ -157,12 +157,12 @@ class DbVisit(NamedTuple): orig_url: Url dt: datetime locator: Loc - src: Optional[SourceName] = None - context: Optional[Context] = None - duration: Optional[Second] = None + src: Optional[SourceName] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9 + context: Optional[Context] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9 + duration: Optional[Second] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9 @staticmethod - def make(p: Visit, src: SourceName) -> Res['DbVisit']: + def make(p: Visit, src: SourceName) -> Res[DbVisit]: try: # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime) if isinstance(p.dt, datetime): @@ -171,7 +171,7 @@ def make(p: Visit, src: SourceName) -> Res['DbVisit']: # TODO that won't be with timezone.. dt = datetime.combine(p.dt, datetime.min.time()) # meh.. else: - raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}') + raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301 except Exception as e: return e @@ -249,7 +249,7 @@ def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]: yield _sanitize(u) -def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]: +def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]: return list(iter_urls(s=s, syntax=syntax)) @@ -274,7 +274,7 @@ class PathWithMtime(NamedTuple): mtime: float @classmethod - def make(cls, p: Path) -> 'PathWithMtime': + def make(cls, p: Path) -> PathWithMtime: return cls( path=p, mtime=p.stat().st_mtime, @@ -362,11 +362,11 @@ def src(self) -> str: # NOTE: used in configs... def last(path: PathIsh, *parts: str) -> Path: import os.path - pp = os.path.join(str(path), *parts) - return Path(max(glob(pp, recursive=True))) + pp = os.path.join(str(path), *parts) # noqa: PTH118 + return Path(max(glob(pp, recursive=True))) # noqa: PTH207 -from .logging import setup_logger +from .logging import setup_logger # noqa: F401 from copy import copy def echain(ex: Exception, cause: Exception) -> Exception: @@ -409,13 +409,13 @@ def default_cache_dir() -> Path: # make it lazy, otherwise it might crash on module import (e.g. on Windows) # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows @lru_cache(1) -def _magic() -> Callable[[PathIsh], Optional[str]]: +def _magic() -> Callable[[PathIsh], str | None]: logger = get_logger() try: import magic # type: ignore except Exception as e: logger.exception(e) - defensive_msg: Optional[str] = None + defensive_msg: str | None = None if isinstance(e, ModuleNotFoundError) and e.name == 'magic': defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation" elif isinstance(e, ImportError): @@ -425,7 +425,7 @@ def _magic() -> Callable[[PathIsh], Optional[str]]: if defensive_msg is not None: logger.warning(defensive_msg) warnings.warn(defensive_msg) - return lambda path: None # stub + return lambda path: None # stub # noqa: ARG005 else: raise e else: @@ -441,7 +441,7 @@ def _mimetypes(): return mimetypes -def mime(path: PathIsh) -> Optional[str]: +def mime(path: PathIsh) -> str | None: ps = str(path) mimetypes = _mimetypes() # first try mimetypes, it's only using the filename without opening the file @@ -453,7 +453,7 @@ def mime(path: PathIsh) -> Optional[str]: return magic(ps) -def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]: +def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]: prune_dir_args = [] ignore_file_args = [] if ignore: @@ -476,7 +476,7 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]: ] -def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]: +def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]: from .config import extra_fd_args ignore_args = [] @@ -496,7 +496,7 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]: ] -def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]: +def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]: if not root.is_dir(): yield root return diff --git a/src/promnesia/compare.py b/src/promnesia/compare.py old mode 100755 new mode 100644 index d3e0dc47..54028030 --- a/src/promnesia/compare.py +++ b/src/promnesia/compare.py @@ -1,10 +1,12 @@ -#!/usr/bin/env python3 +from __future__ import annotations + # TODO perhaps make it external script? import argparse from pathlib import Path import logging import sys -from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple +from typing import Iterator +from typing import TypeVar, Sequence from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath @@ -19,14 +21,11 @@ def get_logger(): # TODO return error depending on severity? -from typing import TypeVar, Sequence - - T = TypeVar('T') def eliminate_by(sa: Sequence[T], sb: Sequence[T], key): - def make_dict(s: Sequence[T]) -> Dict[str, List[T]]: - res: Dict[str, List[T]] = {} + def make_dict(s: Sequence[T]) -> dict[str, list[T]]: + res: dict[str, list[T]] = {} for a in s: k = key(a) ll = res.get(k, None) @@ -39,9 +38,9 @@ def make_dict(s: Sequence[T]) -> Dict[str, List[T]]: db = make_dict(sb) ka = set(da.keys()) kb = set(db.keys()) - onlya: Set[T] = set() - common: Set[T] = set() - onlyb: Set[T] = set() + onlya: set[T] = set() + common: set[T] = set() + onlyb: set[T] = set() for k in ka.union(kb): la = da.get(k, []) lb = db.get(k, []) @@ -54,13 +53,13 @@ def make_dict(s: Sequence[T]) -> Dict[str, List[T]]: return onlya, common, onlyb -def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]: +def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]: logger = get_logger() logger.info('comparing between: %s', between) - errors: List[DbVisit] = [] + errors: list[DbVisit] = [] - umap: Dict[Url, List[DbVisit]] = {} + umap: dict[Url, list[DbVisit]] = {} for a in after: url = a.norm_url xx = umap.get(url, []) # TODO canonify here? @@ -71,7 +70,7 @@ def reg_error(b): errors.append(b) if log: logger.error('between %s missing %s', between, b) - print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr) + print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr) # the idea is that we eliminate items simultaneously from both sets @@ -108,7 +107,7 @@ def get_files(args): if len(args.paths) == 0: int_dir = args.intermediate_dir assert int_dir.exists() - files = list(sorted(int_dir.glob('*.sqlite*'))) + files = sorted(int_dir.glob('*.sqlite*')) files = files[-args.last:] else: files = [Path(p) for p in args.paths] @@ -126,7 +125,7 @@ def main(): sys.exit(1) -def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]: +def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]: assert len(files) > 0 logger = get_logger() diff --git a/src/promnesia/compat.py b/src/promnesia/compat.py index 50611f0c..459d1f41 100644 --- a/src/promnesia/compat.py +++ b/src/promnesia/compat.py @@ -1,12 +1,16 @@ -## we used to have compat fixes here for these for python3.7 -## keeping in case any sources depended on compat functions -from subprocess import PIPE, run, check_call, check_output, Popen -from typing import Protocol, Literal -## +from typing import TYPE_CHECKING -# can remove after python3.9 +if not TYPE_CHECKING: + ## we used to have compat fixes here for these for python3.7 + ## keeping in case any sources depended on compat functions + from subprocess import PIPE, run, check_call, check_output, Popen # noqa: F401 + from typing import Protocol, Literal # noqa: F401 + ## + + +# can deprecate after python3.9 def removeprefix(text: str, prefix: str) -> str: if text.startswith(prefix): return text[len(prefix):] - return text \ No newline at end of file + return text diff --git a/src/promnesia/config.py b/src/promnesia/config.py index 7bc51ec1..b9a56827 100644 --- a/src/promnesia/config.py +++ b/src/promnesia/config.py @@ -1,7 +1,9 @@ +from __future__ import annotations + from pathlib import Path import os from types import ModuleType -from typing import List, Optional, Union, NamedTuple, Iterable, Callable +from typing import Union, NamedTuple, Iterable, Callable import importlib import importlib.util import warnings @@ -13,9 +15,6 @@ HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]] -from typing import Any - - ModuleName = str # something that can be converted into a proper Source @@ -24,19 +23,19 @@ class Config(NamedTuple): # TODO remove default from sources once migrated - SOURCES: List[ConfigSource] = [] + SOURCES: list[ConfigSource] = [] # if not specified, uses user data dir - OUTPUT_DIR: Optional[PathIsh] = None + OUTPUT_DIR: PathIsh | None = None - CACHE_DIR: Optional[PathIsh] = '' - FILTERS: List[str] = [] + CACHE_DIR: PathIsh | None = '' + FILTERS: list[str] = [] - HOOK: Optional[HookT] = None + HOOK: HookT | None = None # # NOTE: INDEXERS is deprecated, use SOURCES instead - INDEXERS: List[ConfigSource] = [] + INDEXERS: list[ConfigSource] = [] #MIME_HANDLER: Optional[str] = None # TODO @property @@ -68,11 +67,11 @@ def sources(self) -> Iterable[Res[Source]]: yield Source(r) @property - def cache_dir(self) -> Optional[Path]: + def cache_dir(self) -> Path | None: # TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this # keeping just in case for now cd = self.CACHE_DIR - cpath: Optional[Path] + cpath: Path | None if cd is None: cpath = None # means 'disabled' in cachew elif cd == '': # meh.. but need to make it None friendly.. @@ -96,10 +95,10 @@ def db(self) -> Path: return self.output_dir / 'promnesia.sqlite' @property - def hook(self) -> Optional[HookT]: + def hook(self) -> HookT | None: return self.HOOK -instance: Optional[Config] = None +instance: Config | None = None def has() -> bool: @@ -139,7 +138,7 @@ def import_config(config_file: PathIsh) -> Config: # TODO: ugh. this causes warnings to be repeated multiple times... need to reuse the pool or something.. -def use_cores() -> Optional[int]: +def use_cores() -> int | None: ''' Somewhat experimental. For now only used in sources.auto, perhaps later will be shared among the other indexers. @@ -154,7 +153,7 @@ def use_cores() -> Optional[int]: return 0 -def extra_fd_args() -> List[str]: +def extra_fd_args() -> list[str]: ''' Not sure where it belongs yet... so via env variable for now Can be used to pass --ignore-file parameter diff --git a/src/promnesia/database/common.py b/src/promnesia/database/common.py index 09e30ed2..bb132ea7 100644 --- a/src/promnesia/database/common.py +++ b/src/promnesia/database/common.py @@ -1,10 +1,11 @@ +from __future__ import annotations + from datetime import datetime -from typing import Sequence, Tuple +from typing import Sequence from sqlalchemy import ( Column, Integer, - Row, String, ) @@ -30,7 +31,7 @@ def get_columns() -> Sequence[Column]: return res -def db_visit_to_row(v: DbVisit) -> Tuple: +def db_visit_to_row(v: DbVisit) -> tuple: # ugh, very hacky... # we want to make sure the resulting tuple only consists of simple types # so we can use dbengine directly diff --git a/src/promnesia/database/dump.py b/src/promnesia/database/dump.py index 05832efd..1007c550 100644 --- a/src/promnesia/database/dump.py +++ b/src/promnesia/database/dump.py @@ -1,6 +1,8 @@ +from __future__ import annotations + from pathlib import Path import sqlite3 -from typing import Dict, Iterable, List, Optional, Set +from typing import Dict, Iterable, Optional from more_itertools import chunked @@ -58,8 +60,8 @@ def visits_to_sqlite( vit: Iterable[Res[DbVisit]], *, overwrite_db: bool, - _db_path: Optional[Path] = None, # only used in tests -) -> List[Exception]: + _db_path: Path | None = None, # only used in tests +) -> list[Exception]: if _db_path is None: db_path = config.get().db else: @@ -95,7 +97,7 @@ def vit_ok() -> Iterable[DbVisit]: def query_total_stats(conn) -> Stats: query = select(table.c.src, func.count(table.c.src)).select_from(table).group_by(table.c.src) - return {src: cnt for (src, cnt) in conn.execute(query).all()} + return dict(conn.execute(query).all()) def get_engine(*args, **kwargs) -> Engine: # kwargs['echo'] = True # useful for debugging @@ -122,7 +124,7 @@ def get_engine(*args, **kwargs) -> Engine: # (note that this also requires WAL mode) engine = get_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS}) - cleared: Set[str] = set() + cleared: set[str] = set() # by default, sqlalchemy does some sort of BEGIN (implicit) transaction, which doesn't provide proper isolation?? # see https://docs.sqlalchemy.org/en/20/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl @@ -144,7 +146,7 @@ def get_engine(*args, **kwargs) -> Engine: insert_stmt_raw = str(insert_stmt.compile(dialect=dialect_sqlite.dialect(paramstyle='qmark'))) for chunk in chunked(vit_ok(), n=_CHUNK_BY): - srcs = set(v.src or '' for v in chunk) + srcs = {v.src or '' for v in chunk} new = srcs.difference(cleared) for src in new: @@ -181,7 +183,7 @@ def get_engine(*args, **kwargs) -> Engine: for k, v in stats_changes.items(): logger.info(f'database stats changes: {k} {v}') - res: List[Exception] = [] + res: list[Exception] = [] if total_ok == 0: res.append(RuntimeError('No visits were indexed, something is probably wrong!')) return res diff --git a/src/promnesia/database/load.py b/src/promnesia/database/load.py index 25f80e40..d58e48ec 100644 --- a/src/promnesia/database/load.py +++ b/src/promnesia/database/load.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from pathlib import Path -from typing import Tuple, List +from typing import Tuple from sqlalchemy import ( create_engine, @@ -39,7 +41,7 @@ def get_db_stuff(db_path: Path) -> DbStuff: return engine, table -def get_all_db_visits(db_path: Path) -> List[DbVisit]: +def get_all_db_visits(db_path: Path) -> list[DbVisit]: # NOTE: this is pretty inefficient if the DB is huge # mostly intended for tests engine, table = get_db_stuff(db_path) diff --git a/src/promnesia/extract.py b/src/promnesia/extract.py index 17dddb81..fc062260 100644 --- a/src/promnesia/extract.py +++ b/src/promnesia/extract.py @@ -1,7 +1,8 @@ +from __future__ import annotations + from functools import lru_cache import re -import traceback -from typing import Set, Iterable, Sequence, Union +from typing import Iterable, Sequence from .cannon import CanonifyException from .common import ( @@ -11,7 +12,7 @@ SourceName, Source, Filter, Url, - Results, Extractor, + Results, ) @@ -53,7 +54,7 @@ def extract_visits(source: Source, *, src: SourceName) -> Iterable[Res[DbVisit]] yield e return - handled: Set[Visit] = set() + handled: set[Visit] = set() try: for p in vit: if isinstance(p, Exception): @@ -94,7 +95,7 @@ def filtered(url: Url) -> bool: return any(f(url) for f in filters()) -def make_filter(thing: Union[str, Filter]) -> Filter: +def make_filter(thing: str | Filter) -> Filter: if isinstance(thing, str): rc = re.compile(thing) def filter_(u: str) -> bool: diff --git a/src/promnesia/kjson.py b/src/promnesia/kjson.py index 95848c26..8426918f 100644 --- a/src/promnesia/kjson.py +++ b/src/promnesia/kjson.py @@ -3,6 +3,7 @@ This is a bit overengineered and I admit it! I'll make it more readable, but in the meantime feel free to open an issue if you're confused about something. """ +from __future__ import annotations from typing import Any, Dict, List, Union, Tuple, cast @@ -36,7 +37,7 @@ def do_dict(self, js: JDict, jp: JPath) -> None: if res is self.SKIP: return for k, v in js.items(): - path = cast(JPath, jp + ((js, k), )) + path = cast(JPath, jp + ((js, k), )) # noqa: RUF005 self._do(v, path) def do_list(self, js: JList, jp: JPath) -> None: @@ -45,7 +46,7 @@ def do_list(self, js: JList, jp: JPath) -> None: if res is self.SKIP: return for i, x in enumerate(js): - path = cast(JPath, jp + ((js, i), )) + path = cast(JPath, jp + ((js, i), )) # noqa: RUF005 self._do(x, path) def _do(self, js: Json, path: JPath) -> None: @@ -65,7 +66,7 @@ def run(self, js: Json) -> None: self._do(js, path) @classmethod - def kpath(cls, path: JPath) -> Tuple[JPathPart, ...]: + def kpath(cls, path: JPath) -> tuple[JPathPart, ...]: return tuple(x[1] for x in path) # type: ignore # TODO path is a sequence of jsons and keys? @@ -73,9 +74,10 @@ def kpath(cls, path: JPath) -> Tuple[JPathPart, ...]: def test_json_processor(): handled = [] class Proc(JsonProcessor): - def handle_dict(self, value: JDict, path): + def handle_dict(self, value: JDict, path): # noqa: ARG002 if 'skipme' in self.kpath(path): # type: ignore[comparison-overlap] return JsonProcessor.SKIP + return None def handle_str(self, value: str, path): if 'http' in value: diff --git a/src/promnesia/misc/install_server.py b/src/promnesia/misc/install_server.py index cf56da17..f5f78d39 100644 --- a/src/promnesia/misc/install_server.py +++ b/src/promnesia/misc/install_server.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 from __future__ import annotations import argparse @@ -9,7 +8,6 @@ import platform import shutil from subprocess import check_call, run -from typing import List SYSTEM = platform.system() UNSUPPORTED_SYSTEM = RuntimeError(f'Platform {SYSTEM} is not supported yet!') @@ -59,7 +57,7 @@ def systemd(*args: str | Path, method=check_call) -> None: ]) -def install_systemd(name: str, out: Path, launcher: str, largs: List[str]) -> None: +def install_systemd(name: str, out: Path, launcher: str, largs: list[str]) -> None: unit_name = name import shlex @@ -81,7 +79,7 @@ def install_systemd(name: str, out: Path, launcher: str, largs: List[str]) -> No raise e -def install_launchd(name: str, out: Path, launcher: str, largs: List[str]) -> None: +def install_launchd(name: str, out: Path, launcher: str, largs: list[str]) -> None: service_name = name arguments = '\n'.join(f'{a}' for a in [launcher, *largs]) out.write_text(LAUNCHD_TEMPLATE.format( diff --git a/src/promnesia/server.py b/src/promnesia/server.py index 707691e5..0557bd33 100644 --- a/src/promnesia/server.py +++ b/src/promnesia/server.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 from __future__ import annotations import argparse @@ -10,7 +9,7 @@ import logging import os from pathlib import Path -from typing import List, NamedTuple, Dict, Optional, Any, Tuple, Protocol +from typing import List, NamedTuple, Dict, Optional, Any, Protocol import pytz @@ -24,7 +23,7 @@ from sqlalchemy.sql import text -from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz +from .common import PathWithMtime, DbVisit, setup_logger, default_output_dir, get_system_tz from .cannon import canonify from .database.load import DbStuff, get_db_stuff, row_to_db_visit @@ -65,7 +64,7 @@ def as_str(self) -> str: }) @classmethod - def from_str(cls, cfgs: str) -> 'ServerConfig': + def from_str(cls, cfgs: str) -> ServerConfig: d = json.loads(cfgs) return cls( db =Path (d['db']), @@ -111,7 +110,7 @@ def as_json(v: DbVisit) -> Json: } -def get_db_path(check: bool=True) -> Path: +def get_db_path(*, check: bool=True) -> Path: db = EnvConfig.get().db if check: assert db.exists(), db @@ -125,7 +124,7 @@ def _get_stuff(db_path: PathWithMtime) -> DbStuff: return get_db_stuff(db_path=db_path.path) -def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name +def get_stuff(db_path: Path | None=None) -> DbStuff: # TODO better name # ok, it will always load from the same db file; but intermediate would be kinda an optional dump. if db_path is None: db_path = get_db_path() @@ -136,7 +135,7 @@ def db_stats(db_path: Path) -> Json: engine, table = get_stuff(db_path) query = select(func.count()).select_from(table) with engine.connect() as conn: - total = list(conn.execute(query))[0][0] + [(total,)] = conn.execute(query) return { 'total_visits': total, } @@ -172,7 +171,7 @@ def search_common(url: str, where: Where) -> VisitsResponse: with engine.connect() as conn: try: # TODO make more defensive here - visits: List[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)] + visits: list[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)] except exc.OperationalError as e: if getattr(e, 'msg', None) == 'no such table: visits': logger.warn('you may have to run indexer first!') @@ -182,7 +181,7 @@ def search_common(url: str, where: Where) -> VisitsResponse: logger.debug('got %d visits from db', len(visits)) - vlist: List[DbVisit] = [] + vlist: list[DbVisit] = [] for vis in visits: dt = vis.dt if dt.tzinfo is None: # FIXME need this for /visits endpoint as well? @@ -225,7 +224,7 @@ def status() -> Json: logger.exception(e) stats = {'ERROR': str(e)} - version: Optional[str] + version: str | None try: version = get_version() except Exception as e: @@ -299,7 +298,7 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse: return search_common( url='http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this - where=lambda table, url: between( + where=lambda table, url: between( # noqa: ARG005 func.strftime( '%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00 # this is a bit fragile, relies on cachew internal timestamp format, e.g. @@ -322,22 +321,23 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse: _NO_VERSION = (0, 11, 14) _LATEST = (9999, 9999, 9999) -def as_version(version: str) -> Tuple[int, int, int]: +def as_version(version: str) -> tuple[int, int, int]: if version == '': return _NO_VERSION try: [v1, v2, v3] = map(int, version.split('.')) - return (v1, v2, v3) except Exception as e: logger = get_logger() logger.error('error while parsing version %s', version) logger.exception(e) return _LATEST + else: + return (v1, v2, v3) @dataclass class VisitedRequest: - urls: List[str] + urls: List[str] # noqa: UP006 # pydantic doesn't like list[str] on 3.8 -- remove later client_version: str = '' VisitedResponse = List[Optional[Json]] @@ -355,7 +355,7 @@ def visited(request: VisitedRequest) -> VisitedResponse: version = as_version(client_version) nurls = [canonify(u) for u in urls] - snurls = list(sorted(set(nurls))) + snurls = sorted(set(nurls)) if len(snurls) == 0: return [] @@ -388,7 +388,7 @@ def visited(request: VisitedRequest) -> VisitedResponse: # brings down large queries to 50ms... with engine.connect() as conn: res = list(conn.execute(query)) - present: Dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res} + present: dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res} results = [] for nu in nurls: r = present.get(nu, None) diff --git a/src/promnesia/sources/auto.py b/src/promnesia/sources/auto.py index c3cb23ca..cd10e447 100644 --- a/src/promnesia/sources/auto.py +++ b/src/promnesia/sources/auto.py @@ -5,25 +5,23 @@ - autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]] - autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]] """ +from __future__ import annotations import csv from concurrent.futures import ProcessPoolExecutor as Pool from contextlib import nullcontext -from datetime import datetime import itertools import json import os -from typing import Optional, Iterable, Union, List, Tuple, NamedTuple, Sequence, Iterator, Iterable, Callable, Any, Dict, Set +from typing import Optional, Iterable, NamedTuple, Sequence, Iterator, Iterable, Callable, Any from fnmatch import fnmatch from pathlib import Path -from functools import lru_cache, wraps -import warnings +from functools import wraps -import pytz -from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger -from ..common import warn_once -from ..config import use_cores +from promnesia.common import Visit, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Result, Results, mime, traverse, file_mtime, echain, logger +from promnesia.common import warn_once +from promnesia.config import use_cores from .filetypes import EUrl, Ctx @@ -31,7 +29,7 @@ from .auto_logseq import logseq_replacer -def _collect(thing, path: List[str], result: List[EUrl]) -> None: +def _collect(thing, path: list[str], result: list[EUrl]) -> None: if isinstance(thing, str): ctx: Ctx = tuple(path) result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)]) @@ -51,9 +49,9 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None: # TODO mm. okay, I suppose could use kython consuming thingy?.. -def collect_from(thing) -> List[EUrl]: - uuu: List[EUrl] = [] - path: List[str] = [] +def collect_from(thing) -> list[EUrl]: + uuu: list[EUrl] = [] + path: list[str] = [] _collect(thing, path, uuu) return uuu @@ -85,7 +83,7 @@ def _plaintext(path: Path) -> Results: def fallback(ex): """Falls back to plaintext in case of issues""" - fallback_active: Dict[Any, bool] = {} + fallback_active: dict[Any, bool] = {} @wraps(ex) def wrapped(path: Path): nonlocal fallback_active @@ -169,7 +167,7 @@ def _org(path: Path) -> Results: def index( *paths: PathIsh, - ignored: Union[Sequence[str], str]=(), + ignored: Sequence[str] | str=(), follow: bool=True, replacer: Replacer=None, ) -> Results: @@ -210,10 +208,10 @@ class Options(NamedTuple): # TODO option to add ignores? not sure.. # TODO I don't like this replacer thing... think about removing it replacer: Replacer - root: Optional[Path]=None + root: Path | None=None -def _index_file_aux(path: Path, opts: Options) -> Union[Exception, List[Result]]: +def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]: # just a helper for the concurrent version (the generator isn't picklable) try: return list(_index_file(path, opts=opts)) @@ -248,7 +246,7 @@ def rit() -> Iterable[Path]: continue p = p.resolve() - if not os.path.exists(p): + if not os.path.exists(p): # noqa: PTH110 logger.debug('ignoring %s: broken symlink?', p) continue @@ -267,7 +265,7 @@ def rit() -> Iterable[Path]: Mime = str from .filetypes import Ex # meh -def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]: +def by_path(pp: Path) -> tuple[Ex | None, Mime | None]: suf = pp.suffix.lower() # firt check suffixes, it's faster s = type2idx(suf) @@ -318,7 +316,7 @@ def _index_file(pp: Path, opts: Options) -> Results: logger.debug('indexing via %s: %s', ip.__name__, pp) - def indexer() -> Union[Urls, Results]: + def indexer() -> Urls | Results: # eh, annoying.. need to make more generic.. idx = ip(pp) try: @@ -353,7 +351,7 @@ def indexer() -> Union[Urls, Results]: v = v._replace(locator=loc) if replacer is not None and root is not None: - upd: Dict[str, Any] = {} + upd: dict[str, Any] = {} href = v.locator.href if href is not None: upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root))) diff --git a/src/promnesia/sources/auto_logseq.py b/src/promnesia/sources/auto_logseq.py index 3fcfbcb7..86ebf5e2 100644 --- a/src/promnesia/sources/auto_logseq.py +++ b/src/promnesia/sources/auto_logseq.py @@ -2,13 +2,13 @@ import urllib.parse def logseq_replacer(path: str, root: str) -> str: - if not path.startswith("editor://") or not (path.endswith('.md') or path.endswith('.org')): + if not path.startswith("editor://") or not (path.endswith((".md", ".org"))): return path - - graph = os.path.basename(root) - page_name = os.path.basename(path).rsplit('.', 1)[0] + + graph = os.path.basename(root) # noqa: PTH119 + page_name = os.path.basename(path).rsplit('.', 1)[0] # noqa: PTH119 encoded_page_name = urllib.parse.quote(page_name) - + uri = f"logseq://graph/{graph}?page={encoded_page_name}" return uri diff --git a/src/promnesia/sources/auto_obsidian.py b/src/promnesia/sources/auto_obsidian.py index c82b6f8c..844eec2f 100644 --- a/src/promnesia/sources/auto_obsidian.py +++ b/src/promnesia/sources/auto_obsidian.py @@ -1,8 +1,8 @@ def obsidian_replacer(p: str, r: str) -> str: if not p.startswith("editor://") or not p.endswith('.md'): return p - + path = p.split('/', 2)[-1] - + uri = f"obsidian://{path}" return uri diff --git a/src/promnesia/sources/browser.py b/src/promnesia/sources/browser.py index e1502e40..cb87e4cb 100644 --- a/src/promnesia/sources/browser.py +++ b/src/promnesia/sources/browser.py @@ -1,16 +1,17 @@ ''' Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers. ''' +from __future__ import annotations import re -from typing import Optional, Iterator, Any, TYPE_CHECKING +from typing import Iterator, Any, TYPE_CHECKING import warnings from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db -def index(p: Optional[PathIsh]=None) -> Results: - from . import hpi +def index(p: PathIsh | None = None) -> Results: + from . import hpi # noqa: F401 if p is None: from my.browser.all import history @@ -24,10 +25,11 @@ def index(p: Optional[PathIsh]=None) -> Results: ) try: yield from _index_new_with_adhoc_config(path=p) - return except Exception as e: logger.exception(e) warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.") + else: + return logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module") yield from _index_old(path=p) @@ -39,7 +41,7 @@ def _index_old(*, path: PathIsh) -> Results: def _index_new_with_adhoc_config(*, path: PathIsh) -> Results: - from . import hpi + from . import hpi # noqa: F401 ## previously, it was possible to index be called with multiple different db search paths ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time @@ -75,8 +77,8 @@ def export_path(cls) -> Paths: def _index_new(history: Iterator[BrowserMergeVisit]) -> Results: for v in history: - desc: Optional[str] = None - duration: Optional[Second] = None + desc: str | None = None + duration: Second | None = None metadata = v.metadata if metadata is not None: desc = metadata.title diff --git a/src/promnesia/sources/browser_legacy.py b/src/promnesia/sources/browser_legacy.py index 76f26cfb..f131d305 100644 --- a/src/promnesia/sources/browser_legacy.py +++ b/src/promnesia/sources/browser_legacy.py @@ -1,8 +1,9 @@ +from __future__ import annotations + from datetime import datetime from pathlib import Path from urllib.parse import unquote import sqlite3 -from typing import List, Set, Optional import pytz @@ -35,21 +36,21 @@ def index(p: PathIsh) -> Results: -def _index_dbs(dbs: List[Path], cachew_name: str): +def _index_dbs(dbs: list[Path], cachew_name: str): # TODO right... not ideal, need to think how to handle it properly... import sys sys.setrecursionlimit(5000) cache_dir = config.get().cache_dir cpath = None if cache_dir is None else cache_dir / cachew_name - emitted: Set = set() + emitted: set = set() yield from _index_dbs_aux(cpath, dbs, emitted=emitted) # todo wow, stack traces are ridiculous here... # todo hmm, feels like it should be a class or something? -@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger) -def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) -> Results: +@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger) # noqa: ARG005 +def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results: if len(dbs) == 0: return @@ -75,7 +76,7 @@ def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) -> yield from _index_db(db, emitted=emitted) -def _index_db(db: Path, emitted: Set): +def _index_db(db: Path, emitted: set): logger.info('processing %s', db) # debug level? # todo schema check (not so critical for cachew though) @@ -121,10 +122,10 @@ def _index_db(db: Path, emitted: Set): ColType = str -from typing import Any, NamedTuple, Tuple, Union, Sequence, Optional +from typing import NamedTuple, Tuple, Union, Sequence class Schema(NamedTuple): - cols: Sequence[Tuple[Col, ColType]] + cols: Sequence[tuple[Col, ColType]] key: Sequence[str] @@ -179,7 +180,7 @@ def row2visit(row: sqlite3.Row, loc: Loc) -> Visit: dt = chrome_time_to_utc(int(ts)) url = unquote(url) # chrome urls are all quoted dd = int(durs) - dur: Optional[Second] = None if dd == 0 else dd // 1_000_000 + dur: Second | None = None if dd == 0 else dd // 1_000_000 return Visit( url=url, dt=dt, diff --git a/src/promnesia/sources/demo.py b/src/promnesia/sources/demo.py index 60c9548b..85e973b5 100644 --- a/src/promnesia/sources/demo.py +++ b/src/promnesia/sources/demo.py @@ -2,11 +2,11 @@ A dummy source, used for testing Generates a sequence of fake evenly separated visits ''' +from __future__ import annotations from datetime import datetime, timedelta -from typing import Union -from ..common import Results, Visit, Loc +from promnesia.common import Results, Visit, Loc IsoFormatDt = str @@ -16,10 +16,10 @@ # TODO allow passing isoformat string as base_dt? # and maybe something similar as delta? start with seconds maybe def index( - count: int=100, - *, - base_dt: Union[datetime, IsoFormatDt] = datetime.min + timedelta(days=5000), - delta: Union[timedelta, Seconds] = timedelta(hours=1), + count: int = 100, + *, + base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000), + delta: timedelta | Seconds = timedelta(hours=1), ) -> Results: base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt) diff --git a/src/promnesia/sources/fbmessenger.py b/src/promnesia/sources/fbmessenger.py index 5e833d51..3a2c9634 100644 --- a/src/promnesia/sources/fbmessenger.py +++ b/src/promnesia/sources/fbmessenger.py @@ -2,11 +2,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] for the messages data. ''' -from ..common import Results, Visit, Loc, extract_urls +from promnesia.common import Results, Visit, Loc, extract_urls def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.fbmessenger import messages for m in messages(): if isinstance(m, Exception): diff --git a/src/promnesia/sources/filetypes.py b/src/promnesia/sources/filetypes.py index 04f8cea8..d5679a29 100644 --- a/src/promnesia/sources/filetypes.py +++ b/src/promnesia/sources/filetypes.py @@ -1,7 +1,8 @@ -#!/usr/bin/env python3 +from __future__ import annotations + from functools import lru_cache from pathlib import Path -from typing import Dict, Callable, Optional, Sequence, NamedTuple, Union, Iterable +from typing import Callable, Sequence, NamedTuple, Union, Iterable from ..common import Results, Url @@ -18,13 +19,13 @@ class EUrl(NamedTuple): # keys are mime types + extensions Ex = Callable[[Path], Union[Results, Iterable[EUrl]]] # None means unhandled -TYPE2IDX: Dict[str, Optional[Ex]] = {} +TYPE2IDX: dict[str, Ex | None] = {} # NOTE: there are some types in auto.py at the moment... it's a bit messy # TYPE2IDX only contains the 'prefixes', to speed up the lookup we are using cache.. @lru_cache(None) -def type2idx(t: str) -> Optional[Ex]: +def type2idx(t: str) -> Ex | None: if len(t) == 0: return None # just in case? # first try exact match @@ -97,9 +98,9 @@ def type2idx(t: str) -> Optional[Ex]: video/ ''' -handle_later = lambda *args, **kwargs: () +handle_later = lambda *_args, **_kwargs: () -def ignore(*args, **kwargs): +def ignore(*_args, **_kwargs): # TODO log (once?) yield from () diff --git a/src/promnesia/sources/github.py b/src/promnesia/sources/github.py index 1ba21d94..e9f95a2d 100644 --- a/src/promnesia/sources/github.py +++ b/src/promnesia/sources/github.py @@ -1,16 +1,16 @@ ''' Uses [[https://github.com/karlicoss/HPI][HPI]] github module ''' +from __future__ import annotations # Note: requires the 'mistletoe' module if you enable render_markdown -from typing import Optional, Set -from ..common import Results, Visit, Loc, iter_urls, logger +from promnesia.common import Results, Visit, Loc, iter_urls, logger def index(*, render_markdown: bool = False) -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.github.all import events if render_markdown: @@ -29,7 +29,7 @@ def index(*, render_markdown: bool = False) -> Results: continue # if enabled, convert the (markdown) body to HTML - context: Optional[str] = e.body + context: str | None = e.body if e.body is not None and render_markdown: context = TextParser(e.body)._doc_ashtml() # type: ignore[possibly-undefined] @@ -59,7 +59,7 @@ def index(*, render_markdown: bool = False) -> Results: # # Note: this set gets reset every event, is here to # prevent duplicates between URLExtract and the markdown parser - emitted: Set[str] = set() + emitted: set[str] = set() for url in iter_urls(e.body): if url in emitted: continue diff --git a/src/promnesia/sources/hackernews.py b/src/promnesia/sources/hackernews.py index de434058..f85d28dd 100644 --- a/src/promnesia/sources/hackernews.py +++ b/src/promnesia/sources/hackernews.py @@ -8,7 +8,7 @@ def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.hackernews import dogsheep for item in dogsheep.items(): diff --git a/src/promnesia/sources/hpi.py b/src/promnesia/sources/hpi.py index 7ce3c52e..6624aaac 100644 --- a/src/promnesia/sources/hpi.py +++ b/src/promnesia/sources/hpi.py @@ -2,10 +2,10 @@ Just a helper for a more humane error message when importing my.* dependencies ''' -from ..common import logger +from promnesia.common import logger try: - import my + import my # noqa: F401 except ImportError as e: logger.exception(e) logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')") diff --git a/src/promnesia/sources/html.py b/src/promnesia/sources/html.py index b3f58465..d68d40fc 100644 --- a/src/promnesia/sources/html.py +++ b/src/promnesia/sources/html.py @@ -1,6 +1,7 @@ ''' Extracts links from HTML files ''' +from __future__ import annotations from pathlib import Path from typing import Iterator, Tuple @@ -10,7 +11,7 @@ from bs4 import BeautifulSoup -# TODO present error summary in the very end; import errors -- makes sense to show +# TODO present error summary in the very end; import errors -- makes sense to show # TODO on some exceptions, request a fallback to text? diff --git a/src/promnesia/sources/hypothesis.py b/src/promnesia/sources/hypothesis.py index c03aa348..a88ed836 100644 --- a/src/promnesia/sources/hypothesis.py +++ b/src/promnesia/sources/hypothesis.py @@ -1,11 +1,11 @@ """ Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myhypothesis][hypothesis]] module """ -from ..common import Loc, Results, Visit, extract_urls, join_tags +from promnesia.common import Loc, Results, Visit, extract_urls, join_tags def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 import my.hypothesis as hyp for h in hyp.highlights(): diff --git a/src/promnesia/sources/instapaper.py b/src/promnesia/sources/instapaper.py index 02bb2a99..ae0fa3d3 100644 --- a/src/promnesia/sources/instapaper.py +++ b/src/promnesia/sources/instapaper.py @@ -1,11 +1,11 @@ ''' Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module ''' -from ..common import Results, logger, Visit, Loc +from promnesia.common import Results, Visit, Loc def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 import my.instapaper as ip for p in ip.pages(): diff --git a/src/promnesia/sources/markdown.py b/src/promnesia/sources/markdown.py index 957be0bb..d471c84f 100644 --- a/src/promnesia/sources/markdown.py +++ b/src/promnesia/sources/markdown.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from pathlib import Path -from typing import Iterator, NamedTuple, Optional +from typing import Iterator, NamedTuple from ..common import Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger @@ -18,7 +20,7 @@ class Parsed(NamedTuple): url: Url - context: Optional[str] + context: str | None Result = Res[Parsed] @@ -118,7 +120,7 @@ def _doc_ashtml(self): self._html = HTML_MARKER + _ashtml(self.doc) return self._html - def _extract(self, cur, last_block=None) -> Iterator[Parsed]: + def _extract(self, cur, last_block=None) -> Iterator[Parsed]: # noqa: ARG002 if not isinstance(cur, (AutoLink, Link)): return diff --git a/src/promnesia/sources/org.py b/src/promnesia/sources/org.py index c9205df6..8d74b9a6 100644 --- a/src/promnesia/sources/org.py +++ b/src/promnesia/sources/org.py @@ -1,10 +1,12 @@ +from __future__ import annotations + from datetime import datetime import re -from typing import Iterable, List, Set, Optional, Iterator, Tuple, NamedTuple, cast +from typing import Iterable, Optional, Iterator, NamedTuple, cast from pathlib import Path -from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime +from ..common import Visit, get_logger, Results, Url, Loc, iter_urls, PathIsh, Res, file_mtime import orgparse @@ -36,7 +38,7 @@ def warn_old_orgparse_once() -> Iterable[Exception]: """ class Parsed(NamedTuple): - dt: Optional[datetime] + dt: datetime | None heading: str @@ -74,7 +76,7 @@ def _get_heading(n: OrgNode): return '' if n.is_root() else n.get_heading(format='raw') -def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgNode]]]: +def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]: try: parsed = _parse_node(node) except Exception as e: @@ -98,7 +100,7 @@ def get_body_compat(node: OrgNode) -> str: # get_body was only added to root in 0.2.0 for x in warn_old_orgparse_once(): # ugh. really crap, but it will at least only warn once... (becaue it caches) - raise x + raise x # noqa: B904 return UPDATE_ORGPARSE_WARNING else: raise e diff --git a/src/promnesia/sources/plaintext.py b/src/promnesia/sources/plaintext.py index 99a4c21f..0e7b3595 100644 --- a/src/promnesia/sources/plaintext.py +++ b/src/promnesia/sources/plaintext.py @@ -1,9 +1,10 @@ -from ..common import get_logger, get_tmpdir, PathIsh, _is_windows -from ..compat import removeprefix +from __future__ import annotations + +from promnesia.common import get_logger, get_tmpdir, PathIsh, _is_windows +from promnesia.compat import removeprefix from functools import lru_cache from pathlib import Path -import os from typing import List # https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html @@ -15,7 +16,7 @@ _URL_REGEX = removeprefix(_URL_REGEX, r'\b') -@lru_cache() +@lru_cache def _has_grep() -> bool: import shutil return shutil.which('grep') is not None @@ -39,7 +40,7 @@ def _has_grep() -> bool: # NOTE: grep/findstr exit with code 1 on no matches... # we hack around it in shellcmd module (search 'grep') -def _grep(*, paths: List[str], recursive: bool) -> Command: +def _grep(*, paths: list[str], recursive: bool) -> Command: return [ 'grep', *(['-r'] if recursive else []), @@ -91,26 +92,26 @@ def extract_from_path(path: PathIsh) -> Command: logger = get_logger() if pp.is_dir(): # TODO handle archives here??? return _extract_from_dir(str(pp)) - else: - if any(pp.suffix == ex for ex in ( - '.xz', - '.bz2', - '.gz', - '.zip', - )): - # todo should be debug? - # or should delete it completely, feels like unpacking archives here is a bit too much - raise RuntimeError(f"Archives aren't supported yet: {path}") - logger.info(f"Extracting from compressed file {path}") - import lzma - from tempfile import NamedTemporaryFile - # TODO hopefully, no collisions - import os.path - fname = os.path.join(tdir.name, os.path.basename(path)) - with open(fname, 'wb') as fo: - with lzma.open(path, 'r') as cf: - fo.write(cf.read()) - return _extract_from_file(fname) - else: - r = _extract_from_file(str(pp)) - return r + + if any(pp.suffix == ex for ex in ( + '.xz', + '.bz2', + '.gz', + '.zip', + )): + # todo should be debug? + # or should delete it completely, feels like unpacking archives here is a bit too much + raise RuntimeError(f"Archives aren't supported yet: {path}") + # logger.info(f"Extracting from compressed file {path}") + # import lzma + # from tempfile import NamedTemporaryFile + # # TODO hopefully, no collisions + # import os.path + # fname = os.path.join(tdir.name, os.path.basename(path)) + # with open(fname, 'wb') as fo: + # with lzma.open(path, 'r') as cf: + # fo.write(cf.read()) + # return _extract_from_file(fname) + + r = _extract_from_file(str(pp)) + return r diff --git a/src/promnesia/sources/pocket.py b/src/promnesia/sources/pocket.py index 416ce254..b027d67a 100644 --- a/src/promnesia/sources/pocket.py +++ b/src/promnesia/sources/pocket.py @@ -1,11 +1,11 @@ ''' Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks ''' -from ..common import Visit, Loc, Results +from promnesia.common import Visit, Loc, Results def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.pocket import articles # TODO use docstring from my. module? E.g. describing which pocket format is expected diff --git a/src/promnesia/sources/reddit.py b/src/promnesia/sources/reddit.py index 20be5ecc..96a3c2db 100644 --- a/src/promnesia/sources/reddit.py +++ b/src/promnesia/sources/reddit.py @@ -1,15 +1,20 @@ ''' Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module ''' +from __future__ import annotations from itertools import chain -from typing import Set, Optional, Type -from ..common import Visit, Loc, extract_urls, Results, logger +from promnesia.common import Visit, Loc, extract_urls, Results, logger + +import typing + +if typing.TYPE_CHECKING: + from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase -def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results: - from . import hpi +def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results: + from . import hpi # noqa: F401 try: from my.reddit.all import submissions, comments, saved, upvoted except ModuleNotFoundError as e: @@ -58,7 +63,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende # mostly here so we can keep track of how the user # wants to render markdown class RedditRenderer: - def __init__(self, render_markdown: bool = False) -> None: + def __init__(self, *, render_markdown: bool = False) -> None: self._link_extractor = None self._parser_cls = None try: @@ -77,7 +82,7 @@ def __init__(self, render_markdown: bool = False) -> None: self.render_markdown = render_markdown - def _from_comment(self, i: 'Comment') -> Results: + def _from_comment(self, i: Comment) -> Results: locator = Loc.make( title='Reddit comment', href=i.url, @@ -85,7 +90,7 @@ def _from_comment(self, i: 'Comment') -> Results: yield from self._from_common(i, locator=locator) - def _from_submission(self, i: 'Submission') -> Results: + def _from_submission(self, i: Submission) -> Results: locator = Loc.make( title=f'Reddit submission: {i.title}', href=i.url, @@ -93,7 +98,7 @@ def _from_submission(self, i: 'Submission') -> Results: yield from self._from_common(i, locator=locator) - def _from_upvote(self, i: 'Upvote') -> Results: + def _from_upvote(self, i: Upvote) -> Results: locator = Loc.make( title='Reddit upvote', href=i.url, @@ -101,7 +106,7 @@ def _from_upvote(self, i: 'Upvote') -> Results: yield from self._from_common(i, locator=locator) - def _from_save(self, i: 'Save') -> Results: + def _from_save(self, i: Save) -> Results: locator = Loc.make( title='Reddit save', href=i.url, @@ -117,7 +122,7 @@ def _render_body(self, text: str) -> str: return text - def _from_common(self, i: 'RedditBase', locator: Loc) -> Results: + def _from_common(self, i: RedditBase, locator: Loc) -> Results: urls = [i.url] # TODO this should belong to HPI.. fix permalink handling I guess # ok, it's not present for all of them.. @@ -130,7 +135,7 @@ def _from_common(self, i: 'RedditBase', locator: Loc) -> Results: context = self._render_body(i.text) - emitted: Set[str] = set() + emitted: set[str] = set() for url in chain(urls, extract_urls(i.text)): if url in emitted: @@ -165,8 +170,3 @@ def _from_common(self, i: 'RedditBase', locator: Loc) -> Results: ) emitted.add(res.url) - -import typing -if typing.TYPE_CHECKING: - from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase - diff --git a/src/promnesia/sources/rss.py b/src/promnesia/sources/rss.py index 580c632f..3829acfa 100644 --- a/src/promnesia/sources/rss.py +++ b/src/promnesia/sources/rss.py @@ -2,9 +2,7 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data. ''' -from itertools import chain - -from ..common import Visit, Loc, extract_urls, Results, get_logger +from promnesia.common import Visit, Loc, Results from datetime import datetime diff --git a/src/promnesia/sources/shellcmd.py b/src/promnesia/sources/shellcmd.py index 0748ceae..9afb4b3e 100644 --- a/src/promnesia/sources/shellcmd.py +++ b/src/promnesia/sources/shellcmd.py @@ -1,19 +1,20 @@ """ Greps out URLs from an arbitrary shell command results. """ +from __future__ import annotations from datetime import datetime import os import re from subprocess import run, PIPE -from typing import Union, Sequence +from typing import Sequence import warnings from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh from .plaintext import _has_grep -def index(command: Union[str, Sequence[PathIsh]]) -> Results: +def index(command: str | Sequence[PathIsh]) -> Results: cmd: Sequence[PathIsh] cmds: str if isinstance(command, str): @@ -71,7 +72,7 @@ def handle_line(line: str) -> Results: context=context, ) - r = run(cmd, stdout=PIPE) + r = run(cmd, stdout=PIPE, check=False) if r.returncode > 0: if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches... r.check_returncode() diff --git a/src/promnesia/sources/signal.py b/src/promnesia/sources/signal.py index 6fdd22c5..aa4a4e55 100644 --- a/src/promnesia/sources/signal.py +++ b/src/promnesia/sources/signal.py @@ -1,6 +1,7 @@ """ Collects visits from Signal Desktop's encrypted SQLIite db(s). """ +from __future__ import annotations # Functions get their defaults from module-data. # @@ -17,7 +18,7 @@ from contextlib import contextmanager from pathlib import Path from textwrap import dedent, indent -from typing import Any, Iterable, Iterator, Mapping, Union, Optional +from typing import Any, Iterable, Iterator, Mapping, Union from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch @@ -29,7 +30,7 @@ def index( http_only: bool = False, locator_schema: str="editor", append_platform_path: bool = False, - override_key: Optional[str] = None, + override_key: str | None = None, ) -> Results: """ :param db_paths: @@ -109,10 +110,10 @@ def index( id, type, coalesce( - profileFullName, - profileName, + profileFullName, + profileName, name, - profileFamilyName, + profileFamilyName, e164 ) as aname, name, @@ -237,11 +238,11 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path] platform_name = platform.system() try: plat_paths = platform_db_paths[platform_name] - except LookupError: + except LookupError as le: raise ValueError( f"Unknown platform({platform_name}!" f"\n Expected one of {list(platform_db_paths.keys())}." - ) + ) from le if db_paths and append: db_paths = [ # type: ignore[assignment] @@ -261,7 +262,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path: def _key_from_config(signal_desktop_config_path: PathIsh) -> str: - with open(signal_desktop_config_path, "r") as conf: + with Path(signal_desktop_config_path).open() as conf: return json.load(conf)["key"] @@ -269,6 +270,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str: def connect_db( db_path: Path, key, + *, decrypt_db: bool = False, sqlcipher_exe: PathIsh = "sqlcipher", **decryption_pragmas: Mapping[str, Any], @@ -333,7 +335,7 @@ def connect_db( check=True, input=sql, capture_output=True, - universal_newlines=True, + text=True, ) except sbp.CalledProcessError as ex: prefix = " " * 4 @@ -380,7 +382,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results: if not urls: return - assert ( + assert ( # noqa: PT018 text and mid and sender and chatname ), f"should have eliminated messages without 'http' or missing ids: {row}" @@ -400,7 +402,7 @@ def _harvest_db( db_path: Path, messages_query: str, *, - override_key: Optional[str] = None, + override_key: str | None = None, locator_schema: str = "editor", decrypt_db: bool = False, **decryption_pragmas, diff --git a/src/promnesia/sources/smscalls.py b/src/promnesia/sources/smscalls.py index 8097bd63..e19df8b7 100644 --- a/src/promnesia/sources/smscalls.py +++ b/src/promnesia/sources/smscalls.py @@ -6,7 +6,7 @@ def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.smscalls import messages for m in messages(): diff --git a/src/promnesia/sources/stackexchange.py b/src/promnesia/sources/stackexchange.py index 06071834..1dc8f9e2 100644 --- a/src/promnesia/sources/stackexchange.py +++ b/src/promnesia/sources/stackexchange.py @@ -2,11 +2,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data. ''' -from ..common import Results, Visit, Loc +from promnesia.common import Results, Visit, Loc def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 import my.stackexchange.gdpr as G for v in G.votes(): if isinstance(v, Exception): diff --git a/src/promnesia/sources/takeout.py b/src/promnesia/sources/takeout.py index 8beb5a16..cfa1191a 100644 --- a/src/promnesia/sources/takeout.py +++ b/src/promnesia/sources/takeout.py @@ -1,11 +1,14 @@ ''' Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module ''' -from typing import Iterable, Set, Any, NamedTuple +from __future__ import annotations + +import json +from typing import Iterable, Any, NamedTuple import warnings -from ..common import Visit, Loc, Results, logger -from ..compat import removeprefix +from promnesia.common import Visit, Loc, Results, logger +from promnesia.compat import removeprefix # incase user is using an old version of google_takeout_parser @@ -14,8 +17,7 @@ class YoutubeCSVStub(NamedTuple): def index() -> Results: - from . import hpi - import json + from . import hpi # noqa: F401 try: from my.google.takeout.parser import events @@ -32,7 +34,7 @@ def index() -> Results: return - _seen: Set[str] = { + _seen: set[str] = { # these are definitely not useful for promnesia 'Location', 'PlaceVisit', @@ -54,7 +56,7 @@ def warn_once_if_not_seen(e: Any) -> Iterable[Exception]: if et_name in _seen: return _seen.add(et_name) - yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}") + yield RuntimeError(f"Unhandled event {type(e)!r}: {e}") for e in events(): if isinstance(e, Exception): diff --git a/src/promnesia/sources/takeout_legacy.py b/src/promnesia/sources/takeout_legacy.py index 675397a6..60934c32 100644 --- a/src/promnesia/sources/takeout_legacy.py +++ b/src/promnesia/sources/takeout_legacy.py @@ -1,8 +1,10 @@ -from ..common import Visit, logger, PathIsh, Url, Loc, Results +from __future__ import annotations + +from promnesia.common import Visit, logger, Loc, Results # TODO make an iterator, insert in db as we go? handle errors gracefully? def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.google.takeout.paths import get_takeouts takeouts = list(get_takeouts()) # TODO if no takeouts, raise? @@ -25,7 +27,7 @@ def index() -> Results: import pytz from itertools import chain from datetime import datetime -from typing import List, Optional, Iterable, TYPE_CHECKING +from typing import Iterable from pathlib import Path import json @@ -61,7 +63,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]: locator = Loc.file(spath) from my.google.takeout.html import read_html - for dt, url, title in read_html(takeout, spath): + for dt, url, _title in read_html(takeout, spath): yield Visit( url=url, dt=dt, diff --git a/src/promnesia/sources/telegram.py b/src/promnesia/sources/telegram.py index 9f0c7850..eddb3d61 100644 --- a/src/promnesia/sources/telegram.py +++ b/src/promnesia/sources/telegram.py @@ -1,11 +1,12 @@ -from typing import Optional +from __future__ import annotations + from urllib.parse import unquote # TODO mm, make it easier to rememember to use... import warnings from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh -def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results: +def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results: if database is None: # fully relying on HPI yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info) @@ -17,10 +18,11 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra ) try: yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info) - return except Exception as e: logger.exception(e) warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.") + else: + return logger.warning("Falling back onto promnesia.sources.telegram_legacy module") yield from _index_legacy(database=database, http_only=http_only) @@ -32,7 +34,7 @@ def _index_legacy(*, database: PathIsh, http_only: bool) -> Results: def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results: - from . import hpi + from . import hpi # noqa: F401 class config: class telegram: @@ -45,14 +47,14 @@ class telegram_backup: def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results: - from . import hpi + from . import hpi # noqa: F401 from my.telegram.telegram_backup import messages extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None - for i, m in enumerate(messages( - with_extra_media_info=with_extra_media_info, - extra_where=extra_where, - )): + for m in messages( + with_extra_media_info=with_extra_media_info, + extra_where=extra_where, + ): text = m.text urls = extract_urls(text) diff --git a/src/promnesia/sources/telegram_legacy.py b/src/promnesia/sources/telegram_legacy.py index d9846541..bda0fc35 100644 --- a/src/promnesia/sources/telegram_legacy.py +++ b/src/promnesia/sources/telegram_legacy.py @@ -1,11 +1,12 @@ ''' Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data ''' +from __future__ import annotations from pathlib import Path import sqlite3 from textwrap import dedent -from typing import Union, TypeVar +from typing import TypeVar from urllib.parse import unquote # TODO mm, make it easier to rememember to use... from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain @@ -14,11 +15,10 @@ T = TypeVar("T") -def unwrap(res: Union[T, Exception]) -> T: +def unwrap(res: T | Exception) -> T: if isinstance(res, Exception): raise res - else: - return res + return res def index(database: PathIsh, *, http_only: bool=False) -> Results: diff --git a/src/promnesia/sources/twitter.py b/src/promnesia/sources/twitter.py index 1fce3c10..fc8590f4 100644 --- a/src/promnesia/sources/twitter.py +++ b/src/promnesia/sources/twitter.py @@ -3,11 +3,11 @@ ''' from typing import Iterable -from ..common import logger, Results, Visit, Loc, extract_urls, Res +from promnesia.common import logger, Results, Visit, Loc, extract_urls, Res def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 import my.twitter.all as tw # TODO hmm. tweets themselves are sort of visits? not sure if they should contribute.. processed = 0 diff --git a/src/promnesia/sources/vcs.py b/src/promnesia/sources/vcs.py index 8bf4ab8c..b726e358 100644 --- a/src/promnesia/sources/vcs.py +++ b/src/promnesia/sources/vcs.py @@ -1,6 +1,7 @@ ''' Clones & indexes Git repositories (via sources.auto) ''' +from __future__ import annotations # TODO not sure if worth exposing... could be just handled by auto or something?) from pathlib import Path diff --git a/src/promnesia/sources/viber.py b/src/promnesia/sources/viber.py index 23dfeca5..be33fc62 100644 --- a/src/promnesia/sources/viber.py +++ b/src/promnesia/sources/viber.py @@ -1,13 +1,14 @@ """ Collects visits from Viber desktop app (e.g. `~/.ViberPC/XYZ123/viber.db`) """ +from __future__ import annotations import logging import textwrap from os import PathLike from pathlib import Path import sqlite3 -from typing import Iterable, Optional +from typing import Iterable from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch, join_tags from ..sqlite import sqlite_connection @@ -34,12 +35,12 @@ def index( msgs_query = messages_query(http_only) - for db_path in _get_files(db_path): - assert db_path.is_file(), f"Is it a (Viber-desktop sqlite) file? {db_path}" - yield from _harvest_db(db_path, msgs_query, locator_schema) + for db in _get_files(db_path): + assert db.is_file(), f"Is it a (Viber-desktop sqlite) file? {db}" + yield from _harvest_db(db, msgs_query, locator_schema) -def messages_query(http_only: Optional[bool]) -> str: +def messages_query(http_only: bool | None) -> str: """ An SQL-query returning 1 row for each message @@ -123,7 +124,7 @@ def _handle_row(row: sqlite3.Row, db_path: PathLike, locator_schema: str) -> Res tags: str = row["tags"] url_title: str = row["url_title"] - assert ( + assert ( # noqa: PT018 text and mid and sender and chatname ), f"sql-query should eliminate messages without 'http' or missing ids: {row}" @@ -154,7 +155,7 @@ def _get_files(path: PathIsh) -> Iterable[Path]: """ path = Path(path).expanduser() parts = path.parts[1:] if path.is_absolute() else path.parts - return Path(path.root).glob(str(Path("").joinpath(*parts))) + return Path(path.root).glob(str(Path("").joinpath(*parts))) # noqa: PTH201 def _harvest_db(db_path: PathIsh, msgs_query: str, locator_schema: str) -> Results: diff --git a/src/promnesia/sources/website.py b/src/promnesia/sources/website.py index e705644b..86293b88 100644 --- a/src/promnesia/sources/website.py +++ b/src/promnesia/sources/website.py @@ -30,7 +30,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]: ] # TODO follow sitemap? e.g. gwern logger.info(' '.join(cmd)) - res = run(cmd) + res = run(cmd, check=False) if res.returncode == 8: # man wget: 8 means server error (e.g. broken link) diff --git a/src/promnesia/sources/zulip.py b/src/promnesia/sources/zulip.py index 762c7982..49d5bd44 100644 --- a/src/promnesia/sources/zulip.py +++ b/src/promnesia/sources/zulip.py @@ -2,11 +2,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] for Zulip data. ''' -from ..common import Results, Visit, Loc, iter_urls +from promnesia.common import Results, Visit, Loc, iter_urls def index() -> Results: - from . import hpi + from . import hpi # noqa: F401 import my.zulip.organization as Z for m in Z.messages(): if isinstance(m, Exception): diff --git a/src/promnesia/sqlite.py b/src/promnesia/sqlite.py index 387b4aa9..41ccbdda 100644 --- a/src/promnesia/sqlite.py +++ b/src/promnesia/sqlite.py @@ -1,6 +1,8 @@ +from __future__ import annotations + from contextlib import contextmanager import sqlite3 -from typing import Callable, Optional, Any, Iterator, Union, Literal +from typing import Callable, Any, Iterator, Union, Literal from .common import PathIsh @@ -10,13 +12,13 @@ def dict_factory(cursor, row): fields = [column[0] for column in cursor.description] - return {key: value for key, value in zip(fields, row)} + return dict(zip(fields, row)) Factory = Union[SqliteRowFactory, Literal['row', 'dict']] @contextmanager -def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Optional[Factory]=None) -> Iterator[sqlite3.Connection]: +def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Factory | None=None) -> Iterator[sqlite3.Connection]: dbp = f'file:{db}' # https://www.sqlite.org/draft/uri.html#uriimmutable if immutable: diff --git a/src/promnesia/tests/common.py b/src/promnesia/tests/common.py index 6c3ec327..19b69e91 100644 --- a/src/promnesia/tests/common.py +++ b/src/promnesia/tests/common.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from contextlib import closing, contextmanager import gc import inspect @@ -25,7 +27,7 @@ def throw(x: Exception) -> NoReturn: @pytest.fixture -def gc_control(gc_on: bool): +def gc_control(*, gc_on: bool): if gc_on: # no need to do anything, should be on by default yield diff --git a/src/promnesia/tests/server_helper.py b/src/promnesia/tests/server_helper.py index 192b40aa..58639c08 100644 --- a/src/promnesia/tests/server_helper.py +++ b/src/promnesia/tests/server_helper.py @@ -1,9 +1,11 @@ +from __future__ import annotations + from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path import sys import time -from typing import Any, Dict, Iterator, Optional +from typing import Any, Iterator import psutil import requests @@ -18,18 +20,18 @@ class Helper: port: str process: psutil.Popen - def get(self, path: str, *args): + def get(self, path: str): # check it's alive first so the error is cleaner assert self.process.poll() is None, self.process return requests.get(f'http://{self.host}:{self.port}' + path) - def post(self, path: str, *, json: Optional[Dict[str, Any]] = None): + def post(self, path: str, *, json: dict[str, Any] | None = None): assert self.process.poll() is None, self.process return requests.post(f'http://{self.host}:{self.port}' + path, json=json) @contextmanager -def run_server(db: Optional[PathIsh] = None, *, timezone: Optional[str] = None) -> Iterator[Helper]: +def run_server(db: PathIsh | None = None, *, timezone: str | None = None) -> Iterator[Helper]: # TODO not sure, perhaps best to use a thread or something? # but for some tests makes more sense to test in a separate process with free_port() as pp: @@ -56,7 +58,7 @@ def run_server(db: Optional[PathIsh] = None, *, timezone: Optional[str] = None) time.sleep(0.1) else: raise RuntimeError("Cooldn't connect to '{st}' after 50 attempts") - print("Started server up, db: {db}".format(db=db), file=sys.stderr) + print(f"Started server up, db: {db}", file=sys.stderr) yield server diff --git a/src/promnesia/tests/sources/test_auto.py b/src/promnesia/tests/sources/test_auto.py index b1e72ea2..7cf9c9da 100644 --- a/src/promnesia/tests/sources/test_auto.py +++ b/src/promnesia/tests/sources/test_auto.py @@ -19,7 +19,7 @@ def makemap(visits): def it(): vit = (throw(v) if isinstance(v, Exception) else v for v in visits) for k, g in groupby(sorted(vit, key=key), key=key): - yield k, list(sorted(g)) + yield k, sorted(g) return dict(it()) diff --git a/src/promnesia/tests/sources/test_org.py b/src/promnesia/tests/sources/test_org.py index 74480a7c..d7dcec50 100644 --- a/src/promnesia/tests/sources/test_org.py +++ b/src/promnesia/tests/sources/test_org.py @@ -1,4 +1,5 @@ -from typing import Optional +from __future__ import annotations + from ...common import Visit from ...sources.org import extract_from_file @@ -6,7 +7,7 @@ from ..common import get_testdata, throw -def delrf(s: Optional[str]) -> Optional[str]: +def delrf(s: str | None) -> str | None: if s is None: return None # meh.. not sure how ot handle this properly, ideally should be via pytest? diff --git a/src/promnesia/tests/test_cannon.py b/src/promnesia/tests/test_cannon.py index 6921caa8..06d124e6 100644 --- a/src/promnesia/tests/test_cannon.py +++ b/src/promnesia/tests/test_cannon.py @@ -134,7 +134,7 @@ def test_reddit(url, expected): def test_pocket(url, expected): assert canonify(url) == expected -@pytest.mark.parametrize("url,expected", [ +@pytest.mark.parametrize(("url", "expected"), [ # TODO ?? 'https://groups.google.com/a/list.hypothes.is/forum/#!topic/dev/kcmS7H8ssis', # # TODO FIXME fragment handling @@ -295,7 +295,7 @@ def test(url, expected): }, ]) def test_same_norm(urls): - urls = list(sorted(urls)) + urls = sorted(urls) u0 = urls[0] c0 = canonify(u0) for u in urls[1:]: @@ -308,7 +308,7 @@ def test_error(): # borrowed from https://bugs.mageia.org/show_bug.cgi?id=24640#c7 canonify('https://example.com\uFF03@bing.com') -@pytest.mark.parametrize("url,expected", [ +@pytest.mark.parametrize(("url", "expected"), [ ('https://news.ycombinator.com/item?id=', 'news.ycombinator.com/item?id='), ('https://www.youtube.com/watch?v=hvoQiF0kBI8&list&index=2', 'youtube.com/watch?v=hvoQiF0kBI8&list='), @@ -316,7 +316,7 @@ def test_error(): def test_empty_query_parameter(url, expected): assert canonify(url) == expected -@pytest.mark.parametrize("url,expected", [ +@pytest.mark.parametrize(("url", "expected"), [ ('http://www.isfdb.org/cgi-bin/title.cgi?2172', 'isfdb.org/cgi-bin/title.cgi?2172='), ('http://www.isfdb.org/cgi-bin/title.cgi?2172+1', 'isfdb.org/cgi-bin/title.cgi?2172%201='), ('http://www.isfdb.org/cgi-bin/title.cgi?2172&foo=bar&baz&quux', 'isfdb.org/cgi-bin/title.cgi?2172=&baz=&foo=bar&quux='), diff --git a/src/promnesia/tests/test_cli.py b/src/promnesia/tests/test_cli.py index bf9bbabd..be99dec0 100644 --- a/src/promnesia/tests/test_cli.py +++ b/src/promnesia/tests/test_cli.py @@ -22,12 +22,12 @@ def test_demo() -> None: # TODO why does it want post?? time.sleep(2) # meh.. need a generic helper to wait till ready... res = {} - for attempt in range(30): + for _attempt in range(30): time.sleep(1) try: res = requests.post( "http://localhost:16789/search", - json=dict(url="https://github.com/kaushalmodi/ox-hugo/issues"), + json={'url': "https://github.com/kaushalmodi/ox-hugo/issues"}, ).json() break except: diff --git a/src/promnesia/tests/test_config.py b/src/promnesia/tests/test_config.py index d6879ca9..1e36eb80 100644 --- a/src/promnesia/tests/test_config.py +++ b/src/promnesia/tests/test_config.py @@ -1,7 +1,8 @@ +from __future__ import annotations + from contextlib import contextmanager from pathlib import Path from tempfile import TemporaryDirectory -from typing import Union, List from ..common import Source from ..config import import_config, Config @@ -22,7 +23,7 @@ def make(body: str) -> Config: @contextmanager -def with_config(cfg: Union[str, Config]): +def with_config(cfg: str | Config): from .. import config as C assert not C.has() @@ -35,7 +36,7 @@ def with_config(cfg: Union[str, Config]): C.reset() -def index(cfg: Union[str, Config], check=True) -> List[Exception]: +def index(cfg: str | Config, *, check: bool = True) -> list[Exception]: from ..__main__ import _do_index with with_config(cfg): diff --git a/src/promnesia/tests/test_db_dump.py b/src/promnesia/tests/test_db_dump.py index 8128257d..77c23502 100644 --- a/src/promnesia/tests/test_db_dump.py +++ b/src/promnesia/tests/test_db_dump.py @@ -4,7 +4,7 @@ from datetime import datetime, timedelta, timezone from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Iterable +from typing import Any from hypothesis import settings, given @@ -20,13 +20,14 @@ from ..database.load import get_all_db_visits from ..sqlite import sqlite_connection -from .common import gc_control, running_on_ci +from .common import running_on_ci +from .common import gc_control # noqa: F401 -HSETTINGS: dict[str, Any] = dict( - derandomize=True, - deadline=timedelta(seconds=2), # sometimes slow on ci -) +HSETTINGS: dict[str, Any] = { + 'derandomize': True, + 'deadline': timedelta(seconds=2), # sometimes slow on ci +} def test_no_visits(tmp_path: Path) -> None: diff --git a/src/promnesia/tests/test_extract.py b/src/promnesia/tests/test_extract.py index d140f7af..5e8b6d29 100644 --- a/src/promnesia/tests/test_extract.py +++ b/src/promnesia/tests/test_extract.py @@ -3,7 +3,8 @@ from ..common import Visit, DbVisit, Loc, Source from ..extract import extract_visits -from .common import get_testdata, unwrap, running_on_ci, gc_control +from .common import get_testdata, unwrap, running_on_ci +from .common import gc_control # noqa: F401 from more_itertools import ilen import pytest diff --git a/src/promnesia/tests/test_indexer.py b/src/promnesia/tests/test_indexer.py index c9c8a4c2..dd1cd9bf 100644 --- a/src/promnesia/tests/test_indexer.py +++ b/src/promnesia/tests/test_indexer.py @@ -8,7 +8,8 @@ import pytest -from .common import get_testdata, promnesia_bin, reset_filters, write_config +from .common import get_testdata, promnesia_bin, write_config +from .common import reset_filters # noqa: F401 def get_stats(tmp_path: Path) -> Counter: @@ -204,7 +205,7 @@ def HOOK(visit: Res[DbVisit]) -> Iterator[Res[DbVisit]]: if 'page1' in nurl: yield visit._replace(norm_url='patched.com') elif 'page2' in nurl: - raise Exception('boom') # deliberately crash + raise RuntimeError('boom') # deliberately crash elif 'page3' in nurl: # just don't yield anything! it will be omitted pass @@ -235,7 +236,7 @@ def test_example_config(tmp_path: Path) -> None: if _is_windows: pytest.skip("doesn't work on Windows: example config references /usr/include paths") - config = read_example_config() + '\n' + f'OUTPUT_DIR = "{str(tmp_path)}"' + config = read_example_config() + '\n' + f'OUTPUT_DIR = "{tmp_path!s}"' cfg_path = tmp_path / 'example_config.py' cfg_path.write_text(config)