From 3355e6c7c790835d95a4c646265e28375f5ad52b Mon Sep 17 00:00:00 2001
From: Dima Gerasimov <karlicoss@gmail.com>
Date: Wed, 22 Nov 2023 00:32:58 +0000
Subject: [PATCH] tests: move more tests inside packages; clean up

---
 src/promnesia/tests/common.py                 | 48 +++++++++++
 src/promnesia/tests/sources/__init__.py       |  0
 .../promnesia/tests/sources/test_auto.py      | 19 ++---
 src/promnesia/tests/sources/test_filetypes.py | 42 ++++++++++
 .../promnesia/tests/sources/test_org.py       | 19 +++--
 .../promnesia/tests/test_cannon.py            |  4 +-
 .../cli.py => src/promnesia/tests/test_cli.py | 29 +++----
 src/promnesia/tests/test_extract_urls.py      | 43 ++++++++++
 .../promnesia/tests}/test_traverse.py         | 24 +++---
 tests/test_misc.py                            | 84 -------------------
 10 files changed, 176 insertions(+), 136 deletions(-)
 create mode 100644 src/promnesia/tests/sources/__init__.py
 rename tests/test_auto_indexer.py => src/promnesia/tests/sources/test_auto.py (79%)
 create mode 100644 src/promnesia/tests/sources/test_filetypes.py
 rename tests/test_org_indexer.py => src/promnesia/tests/sources/test_org.py (82%)
 rename tests/cannon.py => src/promnesia/tests/test_cannon.py (99%)
 rename tests/cli.py => src/promnesia/tests/test_cli.py (58%)
 create mode 100644 src/promnesia/tests/test_extract_urls.py
 rename {tests => src/promnesia/tests}/test_traverse.py (77%)
 delete mode 100644 tests/test_misc.py

diff --git a/src/promnesia/tests/common.py b/src/promnesia/tests/common.py
index fe2f86e1..0bf6c928 100644
--- a/src/promnesia/tests/common.py
+++ b/src/promnesia/tests/common.py
@@ -1,9 +1,18 @@
+from contextlib import contextmanager
 import gc
 import os
+from pathlib import Path
+import sys
 from typing import NoReturn
 
 import pytest
 
+from ..common import _is_windows
+
+
+def under_ci() -> bool:
+    return 'CI' in os.environ
+
 
 def throw(x: Exception) -> NoReturn:
     '''
@@ -27,3 +36,42 @@ def gc_control(gc_on: bool):
 
 
 running_on_ci = 'CI' in os.environ
+
+
+GIT_ROOT = Path(__file__).absolute().parent.parent.parent.parent
+TESTDATA = GIT_ROOT / 'tests/testdata'
+
+
+def get_testdata(path: str) -> Path:
+    assert TESTDATA.is_dir()
+    res = TESTDATA / path
+    if not res.exists():
+        raise RuntimeError(f"'{res}' not found! You propably need to run 'git submodule update --init --recursive'")
+    return TESTDATA / path
+
+
+@contextmanager
+def tmp_popen(*args, **kwargs):
+    import psutil # type: ignore
+    with psutil.Popen(*args, **kwargs) as p:
+        try:
+            yield p
+        finally:
+            for c in p.children(recursive=True):
+                c.kill()
+            p.kill()
+            p.wait()
+
+# meh
+def promnesia_bin(*args):
+    # not sure it's a good idea to diverge, but not sure if there's a better way either?
+    # ugh. on windows there is no bash so can't use the script
+    # whatever...
+    if under_ci() or _is_windows:
+        # should be able to use the installed version
+        return [sys.executable, '-m', 'promnesia', *args]
+    else:
+        # use version from the repository
+        root = Path(__file__).parent.parent
+        pm = root / 'scripts/promnesia'
+        return [pm, *args]
diff --git a/src/promnesia/tests/sources/__init__.py b/src/promnesia/tests/sources/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_auto_indexer.py b/src/promnesia/tests/sources/test_auto.py
similarity index 79%
rename from tests/test_auto_indexer.py
rename to src/promnesia/tests/sources/test_auto.py
index 6bb7108e..b1e72ea2 100644
--- a/tests/test_auto_indexer.py
+++ b/src/promnesia/tests/sources/test_auto.py
@@ -1,9 +1,9 @@
 from itertools import groupby
 import os
 
-from promnesia.sources import auto
+from ...sources import auto
 
-from common import tdata, throw
+from ..common import get_testdata, throw
 
 sa2464 = 'https://www.scottaaronson.com/blog/?p=2464'
 
@@ -15,18 +15,17 @@
 
 def makemap(visits):
     key = lambda v: v.url
+
     def it():
         vit = (throw(v) if isinstance(v, Exception) else v for v in visits)
         for k, g in groupby(sorted(vit, key=key), key=key):
             yield k, list(sorted(g))
+
     return dict(it())
 
 
 def test_json() -> None:
-    mm = makemap(auto.index(
-        tdata('auto'),
-        ignored='*/orgs/*',
-    ))
+    mm = makemap(auto.index(get_testdata('auto'), ignored='*/orgs/*'))
     assert mm.keys() == _JSON_URLS
 
     # TODO not sure if they deserve separate visits..
@@ -39,7 +38,7 @@ def test_json() -> None:
 
 
 def test_auto() -> None:
-    mm = makemap(auto.index(tdata('auto')))
+    mm = makemap(auto.index(get_testdata('auto')))
     org_link = 'https://www.youtube.com/watch?v=rHIkrotSwcc'
     assert {
         *_JSON_URLS,
@@ -47,21 +46,21 @@ def test_auto() -> None:
     }.issubset(mm.keys())
 
     [v] = mm[org_link]
-    assert v.locator.title == 'orgs' + os.sep + 'file.org:14' # meh
+    assert v.locator.title == 'orgs' + os.sep + 'file.org:14'  # meh
     assert v.locator.href.endswith('file.org:14')
     assert "xxx /r/cpp" in v.context
     assert "I've enjoyed [Chandler Carruth's" in v.context
 
 
 def test_obsidian() -> None:
-    mm = makemap(auto.index(tdata('obsidian-vault')))
+    mm = makemap(auto.index(get_testdata('obsidian-vault')))
     example_url = 'https://example.com'
     [v] = mm[example_url]
     assert v.locator.href.startswith('obsidian://')
 
 
 def test_logseq() -> None:
-    mm = makemap(auto.index(tdata('logseq-graph')))
+    mm = makemap(auto.index(get_testdata('logseq-graph')))
     example_url = 'https://example.com'
     [v] = mm[example_url]
     assert v.locator.href.startswith('logseq://')
diff --git a/src/promnesia/tests/sources/test_filetypes.py b/src/promnesia/tests/sources/test_filetypes.py
new file mode 100644
index 00000000..6c47babc
--- /dev/null
+++ b/src/promnesia/tests/sources/test_filetypes.py
@@ -0,0 +1,42 @@
+from pathlib import Path
+
+from ...common import PathIsh, _is_windows as windows
+from ...sources.auto import by_path
+
+
+def handled(p: PathIsh) -> bool:
+    idx, m = by_path(Path(p))
+    return idx is not None
+    # ideally these won't hit libmagic path (would try to open the file and cause FileNotFoundError)
+
+
+def test_filetypes() -> None:
+    # test media
+    for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()):
+        assert handled('file.' + ext)
+
+    # images
+    for ext in 'gif jpg png jpeg'.split():
+        assert handled('file.' + ext)
+
+    # TODO more granual checks that these are ignored?
+    # binaries
+    for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()):
+        assert handled('file.' + ext)
+
+    # these might have potentially some links
+    for ext in [
+        'svg',
+        'pdf', 'epub', 'ps',
+        'doc', 'ppt', 'xsl',
+        # seriously, windows doesn't know about docx???
+        *([] if windows else 'docx pptx xlsx'.split()),
+        *([] if windows else 'ods odt rtf'.split()),
+    ] + ([] if windows else 'djvu'.split()):
+        assert handled('file.' + ext)
+
+    # source code
+    for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()):
+        assert handled('file.' + ext)
+
+    assert handled('x.html')
diff --git a/tests/test_org_indexer.py b/src/promnesia/tests/sources/test_org.py
similarity index 82%
rename from tests/test_org_indexer.py
rename to src/promnesia/tests/sources/test_org.py
index 10d9f908..fc88deed 100644
--- a/tests/test_org_indexer.py
+++ b/src/promnesia/tests/sources/test_org.py
@@ -1,11 +1,12 @@
 from typing import Optional
 
-from promnesia.common import Visit
-from promnesia.sources.org import extract_from_file
+from ...common import Visit
+from ...sources.org import extract_from_file
 
-from common import tdata, throw
+from ..common import get_testdata, throw
 
-def declrf(s: Optional[str]) -> Optional[str]:
+
+def delrf(s: Optional[str]) -> Optional[str]:
     if s is None:
         return None
     # meh.. not sure how ot handle this properly, ideally should be via pytest?
@@ -14,7 +15,7 @@ def declrf(s: Optional[str]) -> Optional[str]:
 
 
 def test_org_indexer() -> None:
-    [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file.org'))]
+    [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))]
 
     assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc'
     # TODO not sure about filetags?
@@ -24,13 +25,13 @@ def test_org_indexer() -> None:
  https://www.youtube.com/watch?v=rHIkrotSwcc) very much.
 
 '''.lstrip()
-    assert declrf(cpp.context) == exp
+    assert delrf(cpp.context) == exp
 
     assert cozy.url == 'https://twitter.com/Mappletons/status/1255221220263563269'
 
 
 def test_org_indexer_2() -> None:
-    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file3.org'))]
+    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))]
 
     assert len(items) == 6
     assert items[0].url == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
@@ -41,7 +42,7 @@ def test_org_indexer_2() -> None:
 
 
 def test_heading() -> None:
-    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file2.org'))]
+    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file2.org'))]
     assert {i.url for i in items} == {
         'https://en.wikipedia.org/wiki/Computational_topology',
         'http://graphics.stanford.edu/courses/cs468-09-fall/',
@@ -51,7 +52,7 @@ def test_heading() -> None:
 
 
 def test_url_in_properties() -> None:
-    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(tdata('auto/orgs/file4.org'))]
+    items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file4.org'))]
 
     assert len(items) == 2, items
     assert items[0].url == 'https://example.org/ref_example'
diff --git a/tests/cannon.py b/src/promnesia/tests/test_cannon.py
similarity index 99%
rename from tests/cannon.py
rename to src/promnesia/tests/test_cannon.py
index 53cbe32b..6921caa8 100644
--- a/tests/cannon.py
+++ b/src/promnesia/tests/test_cannon.py
@@ -1,8 +1,8 @@
 from typing import cast
 
-import pytest # type: ignore
+import pytest
 
-from promnesia.cannon import canonify, CanonifyException
+from ..cannon import canonify, CanonifyException
 
 # TODO should actually understand 'sequences'?
 # e.g.
diff --git a/tests/cli.py b/src/promnesia/tests/test_cli.py
similarity index 58%
rename from tests/cli.py
rename to src/promnesia/tests/test_cli.py
index cf18a23d..bf9bbabd 100644
--- a/tests/cli.py
+++ b/src/promnesia/tests/test_cli.py
@@ -1,21 +1,15 @@
-from promnesia.common import _is_windows
-
-from common import tmp_popen, promnesia_bin
-
-from pathlib import Path
 import os
-from subprocess import Popen
 import time
 
+from ..common import _is_windows
+
+from .common import get_testdata, promnesia_bin, tmp_popen
+
 import pytest
+import requests
 
 
-def ox_hugo_data() -> Path:
-    p = Path('tests/testdata/ox-hugo/test/site')
-    if not p.exists():
-        raise RuntimeError(f"'{p}' not found! You propably need to run 'git submodule update --init --recursive'")
-    assert p.exists(), p
-    return p
+ox_hugo_data = get_testdata('ox-hugo/test/site')
 
 
 def test_demo() -> None:
@@ -24,10 +18,9 @@ def test_demo() -> None:
         # not sure maybe something with port choice idk
         pytest.skip("TODO broken on Windows")
 
-    import requests
-    with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data())):
-        # FIXME why does it want post??
-        time.sleep(2) # meh.. need a generic helper to wait till ready...
+    with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data)):
+        # TODO why does it want post??
+        time.sleep(2)  # meh.. need a generic helper to wait till ready...
         res = {}
         for attempt in range(30):
             time.sleep(1)
@@ -43,7 +36,7 @@ def test_demo() -> None:
             raise RuntimeError("Couldn't connect to the server")
         vis = res['visits']
         assert len(vis) > 50, vis
-        mds  = [x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)]
+        mds = [x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)]
         orgs = [x for x in vis if x['locator']['title'].startswith('content-org/single-posts/empty_tag.org'.replace('/', os.sep))]
-        assert len(mds ) == 1
+        assert len(mds) == 1
         assert len(orgs) == 1
diff --git a/src/promnesia/tests/test_extract_urls.py b/src/promnesia/tests/test_extract_urls.py
new file mode 100644
index 00000000..f72aaccf
--- /dev/null
+++ b/src/promnesia/tests/test_extract_urls.py
@@ -0,0 +1,43 @@
+from ..common import extract_urls
+
+
+def test_extract_simple() -> None:
+    lines = """
+ I've enjoyed [Chandler Carruth's _There Are No Zero-cost Abstractions_](
+ https://www.youtube.com/watch?v=rHIkrotSwcc) very much.
+""".strip()
+    assert set(extract_urls(lines)) == {'https://www.youtube.com/watch?v=rHIkrotSwcc'}
+
+
+def test_extract_2() -> None:
+    text = '''‍♂️ Чтобы снизить вероятность ошибиться, важно знать про когнитивные искажения.
+    Если для вас это новое словосочетание, начните с книжки
+    "Гарри Поттер и Методы рационального мышления" - http://hpmor.ru/, если вы знакомы с понятием - читайте цепочки на сайтах
+    lesswrong.ru и lesswrong.com, книжку Даниэля Канемана "Thinking, fast and slow" и канал Пион https://t.me/ontologics
+    '''
+    assert set(extract_urls(text)) == {'http://hpmor.ru/', 'lesswrong.ru', 'lesswrong.com', 'https://t.me/ontologics'}
+
+
+def test_extract_md() -> None:
+    lines = '''
+Hey, I recently implemented a new extension for that [addons.mozilla.org](https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/), [github](https://github.com/karlicoss/grasp), perhaps it could be useful for you!
+    '''
+    assert set(extract_urls(lines)) == {
+        'addons.mozilla.org',
+        'https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/',
+        'https://github.com/karlicoss/grasp',
+    }
+
+
+# just random links to test multiline/whitespace behaviour
+def test_extract_3() -> None:
+    lines = '''
+python.org/one.html ?? https://python.org/two.html some extra text
+
+    whatever.org
+    '''
+    assert set(extract_urls(lines, syntax='org')) == {
+        'python.org/one.html',
+        'https://python.org/two.html',
+        'whatever.org',
+    }
diff --git a/tests/test_traverse.py b/src/promnesia/tests/test_traverse.py
similarity index 77%
rename from tests/test_traverse.py
rename to src/promnesia/tests/test_traverse.py
index 4a750e3d..1153a297 100644
--- a/tests/test_traverse.py
+++ b/src/promnesia/tests/test_traverse.py
@@ -1,43 +1,41 @@
-from pathlib import Path
-from promnesia.common import traverse
-from unittest.mock import Mock, patch
-from common import DATA
+from unittest.mock import patch
 
+from ..common import traverse
+
+from .common import get_testdata
+
+
+testDataPath = get_testdata('traverse')
 
-testDataPath = Path(DATA) / 'traverse'
 
 # Patch shutil.which so it always returns false (when trying to which fdfind, etc)
 # so that it falls back to find
 @patch('promnesia.common.shutil.which', return_value=False)
-def test_traverse_ignore_find(patched):
+def test_traverse_ignore_find(patched) -> None:
     '''
     traverse() with `find` but ignore some stuff
     '''
-    # act
     paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
 
-    # assert
     assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}
 
+
 def test_traverse_ignore_fdfind():
     '''
     traverse() with `fdfind` but ignore some stuff
     '''
-    # act
     paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
 
-    # assert
     assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
 
+
 # TODO: It would be nice to test the implementation directly without having to do this
 # weird patching in the future
 @patch('promnesia.common._is_windows', new_callable=lambda: True)
-def test_traverse_ignore_windows(patched):
+def test_traverse_ignore_windows(patched) -> None:
     '''
     traverse() with python when _is_windows is true but ignore some stuff
     '''
-    # act
     paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
 
-    # assert
     assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
diff --git a/tests/test_misc.py b/tests/test_misc.py
deleted file mode 100644
index 6e1a2819..00000000
--- a/tests/test_misc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from pathlib import Path
-from promnesia.common import extract_urls
-
-def test_extract():
-    lines = """
- I've enjoyed [Chandler Carruth's _There Are No Zero-cost Abstractions_](
- https://www.youtube.com/watch?v=rHIkrotSwcc) very much.
-""".strip()
-    assert set(extract_urls(lines)) == {'https://www.youtube.com/watch?v=rHIkrotSwcc'}
-
-
-def test_extract_2():
-   text = '''‍♂️ Чтобы снизить вероятность ошибиться, важно знать про когнитивные искажения.
-   Если для вас это новое словосочетание, начните с книжки
-   "Гарри Поттер и Методы рационального мышления" - http://hpmor.ru/, если вы знакомы с понятием - читайте цепочки на сайтах
-   lesswrong.ru и lesswrong.com, книжку Даниэля Канемана "Thinking, fast and slow" и канал Пион https://t.me/ontologics
-   '''
-   assert set(extract_urls(text)) == {'http://hpmor.ru/', 'lesswrong.ru', 'lesswrong.com', 'https://t.me/ontologics'}
-
-def test_extract_md():
-    lines = '''
-Hey, I recently implemented a new extension for that [addons.mozilla.org](https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/), [github](https://github.com/karlicoss/grasp), perhaps it could be useful for you!
-    '''
-    assert set(extract_urls(lines)) == {
-        'addons.mozilla.org',
-        'https://addons.mozilla.org/en-US/firefox/addon/org-grasp-for-org-capture/',
-        'https://github.com/karlicoss/grasp',
-    }
-
-
-# just random links to test multiline/whitespace behaviour
-def test_extract_3() -> None:
-    lines = '''
-python.org/one.html ?? https://python.org/two.html some extra text
-
-    whatever.org
-    '''
-    assert set(extract_urls(lines, syntax='org')) == {
-        'python.org/one.html',
-        'https://python.org/two.html',
-        'whatever.org',
-    }
-
-
-from promnesia.common import PathIsh, _is_windows as windows
-from promnesia.sources.auto import by_path
-
-
-def handled(p: PathIsh) -> bool:
-    idx, m = by_path(Path(p))
-    return idx is not None
-    # ideally these won't hit libmagic path (would try to open the file and cause FileNotFoundError)
-
-
-def test_filetypes() -> None:
-    # test media
-    for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()):
-        assert handled('file.' + ext)
-
-    # images
-    for ext in 'gif jpg png jpeg'.split():
-        assert handled('file.' + ext)
-
-    # TODO more granual checks that these are ignored?
-    # binaries
-    for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()):
-        assert handled('file.' + ext)
-
-    # these might have potentially some links
-    for ext in [
-            'svg',
-            'pdf', 'epub', 'ps',
-            'doc', 'ppt', 'xsl',
-            # seriously, windows doesn't know about docx???
-            *([] if windows else 'docx pptx xlsx'.split()),
-            *([] if windows else 'ods odt rtf'.split()),
-    ] + ([] if windows else 'djvu'.split()): 
-        assert handled('file.' + ext)
-
-    # source code
-    for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()):
-        assert handled('file.' + ext)
-
-    assert handled('x.html')