From 3355e6c7c790835d95a4c646265e28375f5ad52b Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Wed, 22 Nov 2023 00:32:58 +0000 Subject: [PATCH] tests: move more tests inside packages; clean up --- src/promnesia/tests/common.py | 48 +++++++++++ src/promnesia/tests/sources/__init__.py | 0 .../promnesia/tests/sources/test_auto.py | 19 ++--- src/promnesia/tests/sources/test_filetypes.py | 42 ++++++++++ .../promnesia/tests/sources/test_org.py | 19 +++-- .../promnesia/tests/test_cannon.py | 4 +- .../cli.py => src/promnesia/tests/test_cli.py | 29 +++---- src/promnesia/tests/test_extract_urls.py | 43 ++++++++++ .../promnesia/tests}/test_traverse.py | 24 +++--- tests/test_misc.py | 84 ------------------- 10 files changed, 176 insertions(+), 136 deletions(-) create mode 100644 src/promnesia/tests/sources/__init__.py rename tests/test_auto_indexer.py => src/promnesia/tests/sources/test_auto.py (79%) create mode 100644 src/promnesia/tests/sources/test_filetypes.py rename tests/test_org_indexer.py => src/promnesia/tests/sources/test_org.py (82%) rename tests/cannon.py => src/promnesia/tests/test_cannon.py (99%) rename tests/cli.py => src/promnesia/tests/test_cli.py (58%) create mode 100644 src/promnesia/tests/test_extract_urls.py rename {tests => src/promnesia/tests}/test_traverse.py (77%) delete mode 100644 tests/test_misc.py diff --git a/src/promnesia/tests/common.py b/src/promnesia/tests/common.py index fe2f86e1..0bf6c928 100644 --- a/src/promnesia/tests/common.py +++ b/src/promnesia/tests/common.py @@ -1,9 +1,18 @@ +from contextlib import contextmanager import gc import os +from pathlib import Path +import sys from typing import NoReturn import pytest +from ..common import _is_windows + + +def under_ci() -> bool: + return 'CI' in os.environ + def throw(x: Exception) -> NoReturn: ''' @@ -27,3 +36,42 @@ def gc_control(gc_on: bool): running_on_ci = 'CI' in os.environ + + +GIT_ROOT = Path(__file__).absolute().parent.parent.parent.parent +TESTDATA = GIT_ROOT / 'tests/testdata' + + +def get_testdata(path: str) -> Path: + assert TESTDATA.is_dir() + res = TESTDATA / path + if not res.exists(): + raise RuntimeError(f"'{res}' not found! You propably need to run 'git submodule update --init --recursive'") + return TESTDATA / path + + +@contextmanager +def tmp_popen(*args, **kwargs): + import psutil # type: ignore + with psutil.Popen(*args, **kwargs) as p: + try: + yield p + finally: + for c in p.children(recursive=True): + c.kill() + p.kill() + p.wait() + +# meh +def promnesia_bin(*args): + # not sure it's a good idea to diverge, but not sure if there's a better way either? + # ugh. on windows there is no bash so can't use the script + # whatever... + if under_ci() or _is_windows: + # should be able to use the installed version + return [sys.executable, '-m', 'promnesia', *args] + else: + # use version from the repository + root = Path(__file__).parent.parent + pm = root / 'scripts/promnesia' + return [pm, *args] diff --git a/src/promnesia/tests/sources/__init__.py b/src/promnesia/tests/sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_auto_indexer.py b/src/promnesia/tests/sources/test_auto.py similarity index 79% rename from tests/test_auto_indexer.py rename to src/promnesia/tests/sources/test_auto.py index 6bb7108e..b1e72ea2 100644 --- a/tests/test_auto_indexer.py +++ b/src/promnesia/tests/sources/test_auto.py @@ -1,9 +1,9 @@ from itertools import groupby import os -from promnesia.sources import auto +from ...sources import auto -from common import tdata, throw +from ..common import get_testdata, throw sa2464 = 'https://www.scottaaronson.com/blog/?p=2464' @@ -15,18 +15,17 @@ def makemap(visits): key = lambda v: v.url + def it(): vit = (throw(v) if isinstance(v, Exception) else v for v in visits) for k, g in groupby(sorted(vit, key=key), key=key): yield k, list(sorted(g)) + return dict(it()) def test_json() -> None: - mm = makemap(auto.index( - tdata('auto'), - ignored='*/orgs/*', - )) + mm = makemap(auto.index(get_testdata('auto'), ignored='*/orgs/*')) assert mm.keys() == _JSON_URLS # TODO not sure if they deserve separate visits.. @@ -39,7 +38,7 @@ def test_json() -> None: def test_auto() -> None: - mm = makemap(auto.index(tdata('auto'))) + mm = makemap(auto.index(get_testdata('auto'))) org_link = 'https://www.youtube.com/watch?v=rHIkrotSwcc' assert { *_JSON_URLS, @@ -47,21 +46,21 @@ def test_auto() -> None: }.issubset(mm.keys()) [v] = mm[org_link] - assert v.locator.title == 'orgs' + os.sep + 'file.org:14' # meh + assert v.locator.title == 'orgs' + os.sep + 'file.org:14' # meh assert v.locator.href.endswith('file.org:14') assert "xxx /r/cpp" in v.context assert "I've enjoyed [Chandler Carruth's" in v.context def test_obsidian() -> None: - mm = makemap(auto.index(tdata('obsidian-vault'))) + mm = makemap(auto.index(get_testdata('obsidian-vault'))) example_url = 'https://example.com' [v] = mm[example_url] assert v.locator.href.startswith('obsidian://') def test_logseq() -> None: - mm = makemap(auto.index(tdata('logseq-graph'))) + mm = makemap(auto.index(get_testdata('logseq-graph'))) example_url = 'https://example.com' [v] = mm[example_url] assert v.locator.href.startswith('logseq://') diff --git a/src/promnesia/tests/sources/test_filetypes.py b/src/promnesia/tests/sources/test_filetypes.py new file mode 100644 index 00000000..6c47babc --- /dev/null +++ b/src/promnesia/tests/sources/test_filetypes.py @@ -0,0 +1,42 @@ +from pathlib import Path + +from ...common import PathIsh, _is_windows as windows +from ...sources.auto import by_path + + +def handled(p: PathIsh) -> bool: + idx, m = by_path(Path(p)) + return idx is not None + # ideally these won't hit libmagic path (would try to open the file and cause FileNotFoundError) + + +def test_filetypes() -> None: + # test media + for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()): + assert handled('file.' + ext) + + # images + for ext in 'gif jpg png jpeg'.split(): + assert handled('file.' + ext) + + # TODO more granual checks that these are ignored? + # binaries + for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()): + assert handled('file.' + ext) + + # these might have potentially some links + for ext in [ + 'svg', + 'pdf', 'epub', 'ps', + 'doc', 'ppt', 'xsl', + # seriously, windows doesn't know about docx??? + *([] if windows else 'docx pptx xlsx'.split()), + *([] if windows else 'ods odt rtf'.split()), + ] + ([] if windows else 'djvu'.split()): + assert handled('file.' + ext) + + # source code + for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()): + assert handled('file.' + ext) + + assert handled('x.html') diff --git a/tests/test_org_indexer.py b/src/promnesia/tests/sources/test_org.py similarity index 82% rename from tests/test_org_indexer.py rename to src/promnesia/tests/sources/test_org.py index 10d9f908..fc88deed 100644 --- a/tests/test_org_indexer.py +++ b/src/promnesia/tests/sources/test_org.py @@ -1,11 +1,12 @@ from typing import Optional -from promnesia.common import Visit -from promnesia.sources.org import extract_from_file +from ...common import Visit +from ...sources.org import extract_from_file -from common import tdata, throw +from ..common import get_testdata, throw -def declrf(s: Optional[str]) -> Optional[str]: + +def delrf(s: Optional[str]) -> Optional[str]: if s is None: return None # meh.. not sure how ot handle this properly, ideally should be via pytest? @@ -14,7 +15,7 @@ def declrf(s: Optional[str]) -> Optional[str]: def test_org_indexer() -> None: - [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file.org'))] + [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))] assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc' # TODO not sure about filetags? @@ -24,13 +25,13 @@ def test_org_indexer() -> None: https://www.youtube.com/watch?v=rHIkrotSwcc) very much. '''.lstrip() - assert declrf(cpp.context) == exp + assert delrf(cpp.context) == exp assert cozy.url == 'https://twitter.com/Mappletons/status/1255221220263563269' def test_org_indexer_2() -> None: - items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file3.org'))] + items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))] assert len(items) == 6 assert items[0].url == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i' @@ -41,7 +42,7 @@ def test_org_indexer_2() -> None: def test_heading() -> None: - items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file2.org'))] + items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file2.org'))] assert {i.url for i in items} == { 'https://en.wikipedia.org/wiki/Computational_topology', 'http://graphics.stanford.edu/courses/cs468-09-fall/', @@ -51,7 +52,7 @@ def test_heading() -> None: def test_url_in_properties() -> None: - items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file4.org'))] + items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file4.org'))] assert len(items) == 2, items assert items[0].url == 'https://example.org/ref_example' diff --git a/tests/cannon.py b/src/promnesia/tests/test_cannon.py similarity index 99% rename from tests/cannon.py rename to src/promnesia/tests/test_cannon.py index 53cbe32b..6921caa8 100644 --- a/tests/cannon.py +++ b/src/promnesia/tests/test_cannon.py @@ -1,8 +1,8 @@ from typing import cast -import pytest # type: ignore +import pytest -from promnesia.cannon import canonify, CanonifyException +from ..cannon import canonify, CanonifyException # TODO should actually understand 'sequences'? # e.g. diff --git a/tests/cli.py b/src/promnesia/tests/test_cli.py similarity index 58% rename from tests/cli.py rename to src/promnesia/tests/test_cli.py index cf18a23d..bf9bbabd 100644 --- a/tests/cli.py +++ b/src/promnesia/tests/test_cli.py @@ -1,21 +1,15 @@ -from promnesia.common import _is_windows - -from common import tmp_popen, promnesia_bin - -from pathlib import Path import os -from subprocess import Popen import time +from ..common import _is_windows + +from .common import get_testdata, promnesia_bin, tmp_popen + import pytest +import requests -def ox_hugo_data() -> Path: - p = Path('tests/testdata/ox-hugo/test/site') - if not p.exists(): - raise RuntimeError(f"'{p}' not found! You propably need to run 'git submodule update --init --recursive'") - assert p.exists(), p - return p +ox_hugo_data = get_testdata('ox-hugo/test/site') def test_demo() -> None: @@ -24,10 +18,9 @@ def test_demo() -> None: # not sure maybe something with port choice idk pytest.skip("TODO broken on Windows") - import requests - with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data())): - # FIXME why does it want post?? - time.sleep(2) # meh.. need a generic helper to wait till ready... + with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data)): + # TODO why does it want post?? + time.sleep(2) # meh.. need a generic helper to wait till ready... res = {} for attempt in range(30): time.sleep(1) @@ -43,7 +36,7 @@ def test_demo() -> None: raise RuntimeError("Couldn't connect to the server") vis = res['visits'] assert len(vis) > 50, vis - mds = [x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)] + mds = [x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)] orgs = [x for x in vis if x['locator']['title'].startswith('content-org/single-posts/empty_tag.org'.replace('/', os.sep))] - assert len(mds ) == 1 + assert len(mds) == 1 assert len(orgs) == 1 diff --git a/src/promnesia/tests/test_extract_urls.py b/src/promnesia/tests/test_extract_urls.py new file mode 100644 index 00000000..f72aaccf --- /dev/null +++ b/src/promnesia/tests/test_extract_urls.py @@ -0,0 +1,43 @@ +from ..common import extract_urls + + +def test_extract_simple() -> None: + lines = """ + I've enjoyed [Chandler Carruth's _There Are No Zero-cost Abstractions_]( + https://www.youtube.com/watch?v=rHIkrotSwcc) very much. +""".strip() + assert set(extract_urls(lines)) == {'https://www.youtube.com/watch?v=rHIkrotSwcc'} + + +def test_extract_2() -> None: + text = '''‍♂️ Чтобы снизить вероятность ошибиться, важно знать про когнитивные искажения. + Если для вас это новое словосочетание, начните с книжки + "Гарри Поттер и Методы рационального мышления" - http://hpmor.ru/, если вы знакомы с понятием - читайте цепочки на сайтах + lesswrong.ru и lesswrong.com, книжку Даниэля Канемана "Thinking, fast and slow" и канал Пион https://t.me/ontologics + ''' + assert set(extract_urls(text)) == {'http://hpmor.ru/', 'lesswrong.ru', 'lesswrong.com', 'https://t.me/ontologics'} + + +def test_extract_md() -> None: + lines = ''' +Hey, I recently implemented a new extension for that [addons.mozilla.org](https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/), [github](https://github.com/karlicoss/grasp), perhaps it could be useful for you! + ''' + assert set(extract_urls(lines)) == { + 'addons.mozilla.org', + 'https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/', + 'https://github.com/karlicoss/grasp', + } + + +# just random links to test multiline/whitespace behaviour +def test_extract_3() -> None: + lines = ''' +python.org/one.html ?? https://python.org/two.html some extra text + + whatever.org + ''' + assert set(extract_urls(lines, syntax='org')) == { + 'python.org/one.html', + 'https://python.org/two.html', + 'whatever.org', + } diff --git a/tests/test_traverse.py b/src/promnesia/tests/test_traverse.py similarity index 77% rename from tests/test_traverse.py rename to src/promnesia/tests/test_traverse.py index 4a750e3d..1153a297 100644 --- a/tests/test_traverse.py +++ b/src/promnesia/tests/test_traverse.py @@ -1,43 +1,41 @@ -from pathlib import Path -from promnesia.common import traverse -from unittest.mock import Mock, patch -from common import DATA +from unittest.mock import patch +from ..common import traverse + +from .common import get_testdata + + +testDataPath = get_testdata('traverse') -testDataPath = Path(DATA) / 'traverse' # Patch shutil.which so it always returns false (when trying to which fdfind, etc) # so that it falls back to find @patch('promnesia.common.shutil.which', return_value=False) -def test_traverse_ignore_find(patched): +def test_traverse_ignore_find(patched) -> None: ''' traverse() with `find` but ignore some stuff ''' - # act paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2'])) - # assert assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'} + def test_traverse_ignore_fdfind(): ''' traverse() with `fdfind` but ignore some stuff ''' - # act paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2'])) - # assert assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'} + # TODO: It would be nice to test the implementation directly without having to do this # weird patching in the future @patch('promnesia.common._is_windows', new_callable=lambda: True) -def test_traverse_ignore_windows(patched): +def test_traverse_ignore_windows(patched) -> None: ''' traverse() with python when _is_windows is true but ignore some stuff ''' - # act paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2'])) - # assert assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'} diff --git a/tests/test_misc.py b/tests/test_misc.py deleted file mode 100644 index 6e1a2819..00000000 --- a/tests/test_misc.py +++ /dev/null @@ -1,84 +0,0 @@ -from pathlib import Path -from promnesia.common import extract_urls - -def test_extract(): - lines = """ - I've enjoyed [Chandler Carruth's _There Are No Zero-cost Abstractions_]( - https://www.youtube.com/watch?v=rHIkrotSwcc) very much. -""".strip() - assert set(extract_urls(lines)) == {'https://www.youtube.com/watch?v=rHIkrotSwcc'} - - -def test_extract_2(): - text = '''‍♂️ Чтобы снизить вероятность ошибиться, важно знать про когнитивные искажения. - Если для вас это новое словосочетание, начните с книжки - "Гарри Поттер и Методы рационального мышления" - http://hpmor.ru/, если вы знакомы с понятием - читайте цепочки на сайтах - lesswrong.ru и lesswrong.com, книжку Даниэля Канемана "Thinking, fast and slow" и канал Пион https://t.me/ontologics - ''' - assert set(extract_urls(text)) == {'http://hpmor.ru/', 'lesswrong.ru', 'lesswrong.com', 'https://t.me/ontologics'} - -def test_extract_md(): - lines = ''' -Hey, I recently implemented a new extension for that [addons.mozilla.org](https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/), [github](https://github.com/karlicoss/grasp), perhaps it could be useful for you! - ''' - assert set(extract_urls(lines)) == { - 'addons.mozilla.org', - 'https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/', - 'https://github.com/karlicoss/grasp', - } - - -# just random links to test multiline/whitespace behaviour -def test_extract_3() -> None: - lines = ''' -python.org/one.html ?? https://python.org/two.html some extra text - - whatever.org - ''' - assert set(extract_urls(lines, syntax='org')) == { - 'python.org/one.html', - 'https://python.org/two.html', - 'whatever.org', - } - - -from promnesia.common import PathIsh, _is_windows as windows -from promnesia.sources.auto import by_path - - -def handled(p: PathIsh) -> bool: - idx, m = by_path(Path(p)) - return idx is not None - # ideally these won't hit libmagic path (would try to open the file and cause FileNotFoundError) - - -def test_filetypes() -> None: - # test media - for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()): - assert handled('file.' + ext) - - # images - for ext in 'gif jpg png jpeg'.split(): - assert handled('file.' + ext) - - # TODO more granual checks that these are ignored? - # binaries - for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()): - assert handled('file.' + ext) - - # these might have potentially some links - for ext in [ - 'svg', - 'pdf', 'epub', 'ps', - 'doc', 'ppt', 'xsl', - # seriously, windows doesn't know about docx??? - *([] if windows else 'docx pptx xlsx'.split()), - *([] if windows else 'ods odt rtf'.split()), - ] + ([] if windows else 'djvu'.split()): - assert handled('file.' + ext) - - # source code - for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()): - assert handled('file.' + ext) - - assert handled('x.html')