Merge pull request #42 from karlicoss/updates

cleanup, move stuff to my.core, update docs
karlicoss · May 6, 2020 · 40b6a82 · 40b6a82
2 parents 5d3c0bd + d4a430e
commit 40b6a82
Show file tree

Hide file tree

Showing 26 changed files with 469 additions and 427 deletions.
diff --git a/README.org b/README.org
@@ -5,6 +5,11 @@
 
 #+macro: map      @@html:<span style='color:darkgreen; font-weight: bolder'>@@$1@@html:</span>@@
 
+If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
+
+- see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP]] for the *installation/configuration guide*
+- see [[https://github.com/karlicoss/HPI/tree/master/doc/DEVELOPMENT.org][DEVELOPMENT]] for the *development guide*
+
 *TLDR*: I'm using [[https://github.com/karlicoss/HPI][HPI]] (Human Programming Interface) package as a means of unifying, accessing and interacting with all of my personal data.
 
 It's a Python library (named ~my~), a collection of modules for:
@@ -48,11 +53,6 @@ and that's why I'm sharing this.
 Imagine if all your life was reflected digitally and available at your fingertips.
 This library is my attempt to achieve this vision.
 
-If you're in a hurry, feel free to jump straight to the [[#usecases][demos]].
-
-For *installation/configuration/development guide*, see [[https://github.com/karlicoss/HPI/tree/master/doc/SETUP.org][SETUP.org]].
-
-
 #+toc: headlines 2
 
 
@@ -593,4 +593,4 @@ In some near future I will write more about:
 - challenges I had so solve
 - more use-cases and demos -- it's impossible to fit everything in one post!
 
-, but happy to answer any questions on these topics now!
+, but happy to answer any questions on these topics now!
diff --git a/doc/DEVELOPMENT.org b/doc/DEVELOPMENT.org
@@ -1,13 +1,45 @@
+* Running tests
+I'm using =tox= to run test/lint. You can check out [[file:../.github/workflows/main.yml][Github Actions]] config
+and [[file:../scripts/ci/run]] for the up to date info on the specifics.
+
 * IDE setup: make sure my.config is in your package search path
 In runtime, ~my.config~ is imported from the user config directory dynamically.
 
 However, Pycharm/Emacs/whatever you use won't be able to figure that out, so you'd need to adjust your IDE configuration.
 
-- Pycharm: basically, follow the instruction [[https://stackoverflow.com/a/55278260/706389][here]]
+- Pycharm: basically, follow the instructions [[https://stackoverflow.com/a/55278260/706389][here]]
 
   i.e. create a new interpreter configuration (e.g. name it "Python 3.7 (for HPI)"), and add =~/.config/my=.
 
 * Linting
-You should be able to use ~./lint~ script to run mypy checks.
+You should be able to use [[file:../lint]] script to run mypy checks.
+
+[[file:../mypy.ini]] points at =~/.config/my= by default.
+
+
+* Modifying/adding modules
+
+The easiest is just to run HPI via [[file:SETUP.org::#use-without-installing][with_my]] wrapper or with an editable PIP install.
+That way your changes will be reflected immediately, and you will be able to quickly iterate/fix bugs/add new methods.
+
+The "proper way" (unless you want to contribute to the upstream) is to create a separate hierarchy and add your module to =PYTHONPATH=.
+
+For example, if you want to add an =awesomedatasource=, it could be:
+
+: custom_module
+: └── my
+:     └──awesomedatasource.py
+
+You can use all existing HPI modules in =awesomedatasource.py=, for example, =my.config=, or everything from =my.core=.
+
+But also, you can use all the previously defined HPI modules too. This could be useful to *shadow/override* existing HPI module:
+
+: custom_reddit_overlay
+: └── my
+:     └──reddit.py
+
+Now if you add =my_reddit_overlay= *in the front* of ~PYTHONPATH~, all the downstream scripts using =my.reddit= will load it from =custom_reddit_overlay= instead.
+
+This could be useful to monkey patch some behaviours, or dynamically add some extra data sources -- anything that comes to your mind.
 
-~mypy.ini~ file points at =~/.config/my= by default.
+I'll put up a better guide on this, in the meantime see [[https://packaging.python.org/guides/packaging-namespace-packages]["namespace packages"]] for more info.
diff --git a/lint b/lint
@@ -31,25 +31,29 @@ def package_name(p: Path) -> str:
     else:
         return mname(p)
 
+def subpackages(package: str) -> Iterable[str]:
+    ppath = package.replace('.', '/')
+    yield from sorted({
+        package_name(p.relative_to(DIR)) for p in (DIR / ppath).rglob('*.py')
+    })
+
+
 # TODO meh.. think how to check _everything_ on CI
 def core_modules() -> Iterable[str]:
     return [
-        'my.common',
+        *subpackages('my.core'),
+        *subpackages('my.kython'),
         'my.config',
-        'my.core',
         'my.cfg',
-        'my.error',
-        'my.init',
         'tests/misc.py',
         'tests/get_files.py',
         # 'tests/config.py', TODO hmm. unclear how to type check this module
     ]
 
 
+
 def all_modules() -> Iterable[str]:
-    yield from sorted(set(
-        package_name(p.relative_to(DIR)) for p in (DIR / 'my').rglob('*.py')
-    ))
+    yield from subpackages('my')
     yield from sorted(
         str(f.relative_to(DIR)) for f in (DIR / 'tests').rglob('*.py')
     )
@@ -63,11 +67,13 @@ def pylint():
 
 def mypy(thing: str):
     is_package = Path(thing).suffix != '.py'
-    return run([
+    cmd = [
         'mypy',
         '--color-output', # TODO eh? doesn't work..
         *(['-p'] if is_package else []), thing,
-    ], stdout=PIPE, stderr=PIPE)
+    ]
+    print(' '.join(cmd), file=sys.stderr)
+    return run(cmd, stdout=PIPE, stderr=PIPE)
 
 
 def mypy_all() -> Iterable[Exception]:

diff --git a/my/books/kobo.py b/my/books/kobo.py
@@ -1,8 +1,6 @@
 """
 [[https://uk.kobobooks.com/products/kobo-aura-one][Kobo]] e-ink reader: annotations and reading stats
 """
-from .. import init
-
 from typing import Callable, Union, List
 
 from my.config import kobo as config

diff --git a/my/calendar/holidays.py b/my/calendar/holidays.py
@@ -13,7 +13,7 @@
 
 # pip3 install workalendar
 from workalendar.europe import UnitedKingdom # type: ignore
-cal = UnitedKingdom() # TODO FIXME specify in config
+cal = UnitedKingdom() # TODO
 # TODO that should depend on country/'location' of residence I suppose?
 
 

diff --git a/my/cfg.py b/my/cfg.py
@@ -12,15 +12,12 @@
       export_path='/path/to/twitter/exports',
   )
 """
-# TODO later, If I have config stubs that might be unnecessary too..
-
-from . import init
-
+# todo why do we bring this into scope? don't remember..
 import my.config as config
 
 
 def set_repo(name: str, repo):
-    from .init import assign_module
+    from .core.init import assign_module
     from . common import import_from
 
     module = import_from(repo, name)

diff --git a/my/coding/codeforces.py b/my/coding/codeforces.py
@@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-from .. import init
-
 from my.config import codeforces as config
 
 from datetime import datetime

diff --git a/my/coding/github.py b/my/coding/github.py
@@ -1,9 +1,6 @@
 """
 Github events and their metadata: comments/issues/pull requests
 """
-
-from .. import init
-
 from typing import Dict, Any, NamedTuple, Tuple, Optional, Iterator, TypeVar, Set
 from datetime import datetime
 import json

diff --git a/my/coding/topcoder.py b/my/coding/topcoder.py
@@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-from .. import init
-
 from my.config import topcoder as config
 
 from datetime import datetime

diff --git a/my/common.py b/my/common.py
@@ -1,197 +1,2 @@
-from glob import glob as do_glob
-from pathlib import Path
-import functools
-import types
-from typing import Union, Callable, Dict, Iterable, TypeVar, Sequence, List, Optional, Any, cast, Tuple
-import warnings
-
-# some helper functions
-PathIsh = Union[Path, str]
-
-# TODO port annotations to kython?..
-def import_file(p: PathIsh, name: Optional[str]=None) -> types.ModuleType:
-    p = Path(p)
-    if name is None:
-        name = p.stem
-    import importlib.util
-    spec = importlib.util.spec_from_file_location(name, p)
-    foo = importlib.util.module_from_spec(spec)
-    loader = spec.loader; assert loader is not None
-    loader.exec_module(foo) # type: ignore[attr-defined]
-    return foo
-
-
-def import_from(path: PathIsh, name: str) -> types.ModuleType:
-    path = str(path)
-    import sys
-    try:
-        sys.path.append(path)
-        import importlib
-        return importlib.import_module(name)
-    finally:
-        sys.path.remove(path)
-
-
-T = TypeVar('T')
-K = TypeVar('K')
-V = TypeVar('V')
-
-def the(l: Iterable[T]) -> T:
-    it = iter(l)
-    try:
-        first = next(it)
-    except StopIteration as ee:
-        raise RuntimeError('Empty iterator?')
-    assert all(e == first for e in it)
-    return first
-
-
-# TODO more_itertools.bucket?
-def group_by_key(l: Iterable[T], key: Callable[[T], K]) -> Dict[K, List[T]]:
-    res: Dict[K, List[T]] = {}
-    for i in l:
-        kk = key(i)
-        lst = res.get(kk, [])
-        lst.append(i)
-        res[kk] = lst
-    return res
-
-
-def _identity(v: T) -> V:
-    return cast(V, v)
-
-def make_dict(l: Iterable[T], key: Callable[[T], K], value: Callable[[T], V]=_identity) -> Dict[K, V]:
-    res: Dict[K, V] = {}
-    for i in l:
-        k = key(i)
-        v = value(i)
-        pv = res.get(k, None) # type: ignore
-        if pv is not None:
-            raise RuntimeError(f"Duplicate key: {k}. Previous value: {pv}, new value: {v}")
-        res[k] = v
-    return res
-
-
-Cl = TypeVar('Cl')
-R = TypeVar('R')
-
-def cproperty(f: Callable[[Cl], R]) -> R:
-    return property(functools.lru_cache(maxsize=1)(f)) # type: ignore
-
-
-# https://stackoverflow.com/a/12377059/706389
-def listify(fn=None, wrapper=list):
-    """
-    Wraps a function's return value in wrapper (e.g. list)
-    Useful when an algorithm can be expressed more cleanly as a generator
-    """
-    def listify_return(fn):
-        @functools.wraps(fn)
-        def listify_helper(*args, **kw):
-            return wrapper(fn(*args, **kw))
-        return listify_helper
-    if fn is None:
-        return listify_return
-    return listify_return(fn)
-
-
-# TODO FIXME use in bluemaestro
-# def dictify(fn=None, key=None, value=None):
-#     def md(it):
-#         return make_dict(it, key=key, value=value)
-#     return listify(fn=fn, wrapper=md)
-
-
-from .kython.klogging import setup_logger, LazyLogger
-
-
-Paths = Union[Sequence[PathIsh], PathIsh]
-
-DEFAULT_GLOB = '*'
-def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
-    """
-    Helper function to avoid boilerplate.
-
-    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
-    """
-    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
-    sources: List[Path] = []
-    if isinstance(pp, (str, Path)):
-        sources.append(Path(pp))
-    else:
-        sources.extend(map(Path, pp))
-
-    paths: List[Path] = []
-    for src in sources:
-        if src.is_dir():
-            gp: Iterable[Path] = src.glob(glob)
-            paths.extend(gp)
-        else:
-            ss = str(src)
-            if '*' in ss:
-                if glob != DEFAULT_GLOB:
-                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
-                paths.extend(map(Path, do_glob(ss)))
-            else:
-                assert src.is_file(), src
-                # todo assert matches glob??
-                paths.append(src)
-
-    if sort:
-        paths = list(sorted(paths))
-    return tuple(paths)
-
-
-# TODO annotate it, perhaps use 'dependent' type (for @doublewrap stuff)
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from typing import Callable, TypeVar
-    from typing_extensions import Protocol
-    # TODO reuse types from cachew? although not sure if we want hard dependency on it in typecheck time..
-    # I guess, later just define pass through once this is fixed: https://github.com/python/typing/issues/270
-    # ok, that's actually a super nice 'pattern'
-    F = TypeVar('F')
-    class McachewType(Protocol):
-        def __call__(self, cache_path: Any=None, *, hashf: Any=None, chunk_by: int=0, logger: Any=None) -> Callable[[F], F]:
-            ...
-
-    mcachew: McachewType
-
-def mcachew(*args, **kwargs): # type: ignore[no-redef]
-    """
-    Stands for 'Maybe cachew'.
-    Defensive wrapper around @cachew to make it an optional dependency.
-    """
-    try:
-        import cachew
-    except ModuleNotFoundError:
-        warnings.warn('cachew library not found. You might want to install it to speed things up. See https://github.com/karlicoss/cachew')
-        return lambda orig_func: orig_func
-    else:
-        import cachew.experimental
-        cachew.experimental.enable_exceptions()  # TODO do it only once?
-        return cachew.cachew(*args, **kwargs)
-
-
-@functools.lru_cache(1)
-def _magic():
-    import magic # type: ignore
-    return magic.Magic(mime=True)
-
-
-# TODO could reuse in pdf module?
-import mimetypes # todo do I need init()?
-# todo wtf? fastermime thinks it's mime is application/json even if the extension is xz??
-# whereas magic detects correctly: application/x-zstd and application/x-xz
-def fastermime(path: PathIsh) -> str:
-    paths = str(path)
-    # mimetypes is faster
-    (mime, _) = mimetypes.guess_type(paths)
-    if mime is not None:
-        return mime
-    # magic is slower but returns more stuff
-    # TODO Result type?; it's kinda racey, but perhaps better to let the caller decide?
-    return _magic().from_file(paths)
-
-
-Json = Dict[str, Any]
+# will be deprecated. please add stuff to my.core
+from .core.common import *
diff --git a/my/config/__init__.py b/my/config/__init__.py
@@ -1,5 +1,5 @@
 # TODO ok, this thing should trigger .cfg import presumably??
-from .. import init
+from ..core import init
 
 # TODO maybe, reuse mycfg_template here?