Skip to content

Commit

Permalink
Merge pull request #20 from karlicoss/updates
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
karlicoss authored Jun 5, 2020
2 parents c8ffb93 + 85b1149 commit c021cf0
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 88 deletions.
19 changes: 13 additions & 6 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ I write in detail about usecases and motivation for it [[https://beepb00p.xyz/or
- =pip3 install --user .=
- after that you can use =python3 -m orger.modules.modulename=, same way as the previous section, or run =modules/modulename.py= directly

- [optional]: install [[https://pandoc.org/installing.html][pandoc]], it might give you better org-mode outputs for some modules

If you do have pandoc installed, but don't want the module to use it, pass =--disable-pandoc= flag to it.

* Usage and examples
I usually run Orger modules overnight via cron.

Expand Down Expand Up @@ -89,9 +93,9 @@ print(orger.Queue.__doc__)
#+RESULTS:
:results:

*Queue* (old name =InteractiveView=): works as a queue, *only previously unseen items* from the data source are appended to the output org-mode file.
*Queue* (old name =InteractiveView=): works as a queue, *only previously unseen items* from the data source are added to the output org-mode file.

To keep track of old/new items, it's using a separate JSON =state= file.
To keep track of previously seen iteems, it's using a separate JSON =state= file.

A typical usecase is a todo list, or a content processing queue.
You can use such a module as you use any other org-mode file: schedule/refile/comment/set priorities, etc.
Expand All @@ -102,10 +106,13 @@ print(orger.Queue.__doc__)

You can run such a module as:

: # initialize the state file first to avoid surprises (you only need to do it once)
: ./orger_module.py --to /path/to/output.org --state /path/to/state.json --init
: # after that you can just run it:
: ./orger_module.py --to /path/to/output.org --state /path/to/state.json
: ./orger_module.py --to /path/to/output.org

This will keep the state file in your user config dir (e.g. =~/.config/orger/=).

Alternatively, you can pass the state file explicitly:

: ./orger_module.py --to /path/to/output.org --state /path/to/state.json

* FAQ
- Why are the files output by some modules read only?
Expand Down
19 changes: 15 additions & 4 deletions modules/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
from orger import Mirror
from orger.inorganic import node, link
from orger.common import dt_heading, error
from orger import pandoc

import my.coding.github as gh
# todo use later: import my.github.ghexport as gh. also careful about using events() -- need to sort?
# I guess makes sense to generally expose get_ methods?


class Github(Mirror):
Expand All @@ -13,14 +16,22 @@ def get_items(self) -> Mirror.Results:
yield error(e)
continue
# TODO filter only events that have body? e.g. not sure if much point emitting pull requests here
summary = e.summary
body = e.body
if body is None:
lines = summary.splitlines(keepends=True)
if len(lines) > 1:
summary = lines[0].strip()
body = ''.join(lines[1:]) # todo meh. hacky, better to extract bodies in the provider properly
if body.strip() == '':
body = None

yield node(
dt_heading(
e.dt,
link(url=e.link, title=e.summary) if e.link is not None else e.summary
link(url=e.link, title=summary) if e.link is not None else summary
),
# TODO would be nice to convert from markdown to org here
# TODO use pandoc thingie? make it configurable too
body=e.body,
body=None if body is None else pandoc.to_org(body, from_='gfm'), # github flavored markdown
)


Expand Down
58 changes: 14 additions & 44 deletions modules/polar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,23 @@
"""


from orger import StaticView
from orger.inorganic import node, link
from orger import Mirror
from orger.inorganic import node, link, OrgNode
from orger.common import dt_heading
from orger import pandoc

class PolarView(StaticView):

class PolarView(Mirror):
def get_items(self):
from my.reading import polar

def make_comment(c: polar.Comment) -> OrgNode:
text = pandoc.to_org(data=c.text, from_='html', logger=self.logger)
return node(
heading=dt_heading(c.created, text.splitlines()[0]),
body=text,
)

def make_item(res: polar.Result):
if isinstance(res, polar.Error):
# TODO could create error heading from exception automatically? take first line as heading and rest + traceback as the body
Expand All @@ -39,10 +49,7 @@ def make_item(res: polar.Result):
heading=dt_heading(hl.created, hl.selection),
tags=hl.tags,
properties=None if hl.color is None else {'POLAR_COLOR': hex2name(hl.color)},
children=[node(
heading=dt_heading(c.created, c.text.splitlines()[0]),
body=html2org(c.text, logger=self.logger),
) for c in hl.comments]
children=[make_comment(c) for c in hl.comments],
) for hl in book.items]
)
for res in polar.get_entries():
Expand All @@ -61,43 +68,6 @@ def hex2name(hexc: str) -> str:
)


# TODO move to base?
def html2org(html: str, logger) -> str:
# meh. for some reason they are converted to \\ otherwise
html = html.replace('<br>', '')


from subprocess import run, PIPE
try:
r = run(
['pandoc', '-f', 'html', '-t', 'org', '--wrap=none'],
check=True,
input=html.encode('utf8'),
stdout=PIPE,
)
except FileNotFoundError as fe:
import warnings
warnings.warn("Please install 'pandoc' to convert HTML to org-mode. See https://pandoc.org/installing.html")
except Exception as e:
logger.exception(e)
else:
return r.stdout.decode('utf8')
return html # fallback


# TODO decode text incoming from polar?

def test_html2org():
import logging
# html = "<p>and a <i>comment</i> too&nbsp;</p><p><br></p><p><b>multiline</b>!</p>"
# TODO ok, it's annoying... not sure what to do with nonpritable crap
html = "<p>and a <i>comment</i> too</p><p><br></p><p><b>multiline</b>!</p>"
assert html2org(html, logger=logging) == r'''
and a /comment/ too
*multiline*!
'''.lstrip()


if __name__ == '__main__':
PolarView.main()
Expand Down
6 changes: 4 additions & 2 deletions modules/reddit.py → modules/reddit2org.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
"""
Better interface for reading saved reddit posts/comments
"""
from orger import InteractiveView
from orger import Mirror
from orger.inorganic import node, link
from orger.common import dt_heading

from my.reddit import saved

class RedditView(InteractiveView):

class RedditView(Mirror):
def get_items(self):
for s in saved():
yield s.sid, node(
Expand All @@ -21,6 +22,7 @@ def get_items(self):
body=s.text,
)

# todo this could be generic, i.e. checking all urls?
def is_dead_url(self, url: str) -> bool:
assert self.cmdline_args is not None
if not self.cmdline_args.mark_dead:
Expand Down
20 changes: 4 additions & 16 deletions modules/roamresearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,14 @@
from itertools import chain
from typing import Iterable

from orger import StaticView
from orger import Mirror
from orger.inorganic import node, link, OrgNode
from orger.common import dt_heading
from orger import pandoc

import my.roamresearch as roamresearch


from subprocess import run, PIPE

def md2org(text: str) -> str:
# TODO use batch?? or talk to a process
r = run(
['pandoc', '-f', 'markdown', '-t', 'org', '--wrap=none'],
check=True,
input=text.encode('utf8'),
stdout=PIPE,
)
return r.stdout.decode('utf8')


# todo ^^ ^^ things are highlight?
def roam_text_to_org(text: str) -> str:
"""
Expand All @@ -31,7 +19,7 @@ def roam_text_to_org(text: str) -> str:
('{{[[slider]]}}', ''),
]:
text = text.replace(f, t)
org = md2org(text)
org = pandoc.to_org(text, from_='markdown')
org = org.replace(r'\_', '_') # unescape, it's a bit aggressive..
return org

Expand Down Expand Up @@ -87,7 +75,7 @@ def roam_note_to_org(node: roamresearch.Node, top=False) -> Iterable[OrgNode]:
)


class RoamView(StaticView):
class RoamView(Mirror):
def get_items(self):
rr = roamresearch.roam()
from concurrent.futures import ThreadPoolExecutor
Expand Down
9 changes: 5 additions & 4 deletions modules/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@
from orger.inorganic import node, link
from orger.common import dt_heading

from my.media.youtube import get_watched
from my.media.youtube import watched

from itertools import groupby


class YoutubeView(Mirror):
def get_items(self) -> Mirror.Results:
watched = get_watched()
by_url = lambda w: w.url
by_when = lambda w: w.when
items = [
max(group, key=by_when)
for _, group in groupby(sorted(watched, key=by_url), key=by_url)
for _, group in groupby(sorted(watched(), key=by_url), key=by_url)
]
items = sorted(items, key=by_when)
# TODO for each url only take latest?
for item in items:
deleted = item.url == item.title # todo move to HPI?
l = link(title=item.title + (' (DELETED)' if deleted else ''), url=item.url)
yield (item.url, node(
heading=dt_heading(item.when, link(title=item.title, url=item.url)),
heading=dt_heading(item.when, l),
))


Expand Down
3 changes: 2 additions & 1 deletion scripts/ci/run
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ if ! [ -z "$CI" ]; then
fi

# vim is used in one of the tests
command -v vim || sudo apt install vim
command -v vim || sudo apt install vim
command -v pandoc || sudo apt install pandoc

pip3 install --user tox
tox
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def main():
author_email='[email protected]',
description='Converts data into org-mode',

install_requires=['atomicwrites'],
install_requires=[
'appdirs' , # to keep state files
'atomicwrites', # to safely append data to a file
],
extras_require={
'testing': ['pytest'],
'linting': [
Expand Down
7 changes: 7 additions & 0 deletions src/orger/common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from datetime import datetime
from typing import Optional
from pathlib import Path

from .inorganic import OrgNode, timestamp, timestamp_with_style, TimestampStyle


class settings:
DEFAULT_TIMESTAMP_STYLE = TimestampStyle.INACTIVE
USE_PANDOC: bool = True


def dt_heading(dt: Optional[datetime], heading: str) -> str:
Expand Down Expand Up @@ -44,3 +46,8 @@ def todo(dt: datetime, **kwargs):

# todo use klogging2?
from .klogging import LazyLogger, setup_logger


def orger_user_dir() -> Path:
import appdirs # type: ignore[import]
return Path(appdirs.user_config_dir('orger'))
39 changes: 29 additions & 10 deletions src/orger/org_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .inorganic import OrgNode, TimestampStyle
from .state import JsonState
from .atomic_append import PathIsh, atomic_append_check, assert_not_edited
from .common import setup_logger
from .common import setup_logger, orger_user_dir

# TODO tests for determinism? not sure where should they be...
# think of some generic thing to test that?
Expand Down Expand Up @@ -61,10 +61,20 @@ def main_common(self) -> None:
settings.DEFAULT_TIMESTAMP_STYLE = _style_map[timestamp_style]
setup_logger(self.logger, level=logging.DEBUG)

pandoc = self.args.pandoc
settings.USE_PANDOC = pandoc

@classmethod
def parser(cls) -> ArgumentParser:
p = ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
p = argparse.ArgumentParser(formatter_class=F) # type: ignore

p.add_argument(
'--disable-pandoc',
action='store_false',
dest='pandoc',
help='Pass to disable pandoc conversions to org-mode (it might be slow in some cases)',
)
p.add_argument(
'--timestamps',
type=str,
Expand Down Expand Up @@ -93,7 +103,7 @@ class Mirror(OrgView):
@classmethod
def main(cls, setup_parser=None) -> None:
p = cls.parser()
p.add_argument('--to', type=Path, default=Path(cls.name() + '.org'))
p.add_argument('--to', type=Path, default=Path(cls.name() + '.org'), help='Filename to output')
if setup_parser is not None:
setup_parser(p)

Expand Down Expand Up @@ -163,9 +173,9 @@ def test():

class Queue(OrgView):
"""
*Queue* (old name =InteractiveView=): works as a queue, *only previously unseen items* from the data source are appended to the output org-mode file.
*Queue* (old name =InteractiveView=): works as a queue, *only previously unseen items* from the data source are added to the output org-mode file.
To keep track of old/new items, it's using a separate JSON =state= file.
To keep track of previously seen iteems, it's using a separate JSON =state= file.
A typical usecase is a todo list, or a content processing queue.
You can use such a module as you use any other org-mode file: schedule/refile/comment/set priorities, etc.
Expand All @@ -183,8 +193,16 @@ def _run(
dry_run: bool=False,
) -> None:
if not to.exists() and not init:
raise RuntimeError(f"target {to} doesn't exist! Try running with --init")
err = RuntimeError(f"{to} doesn't exist! Try running with --init")
import sys
if sys.stdin.isatty():
resp = input(f"{to} doesn't exist. Create empty file? y/n ").strip().lower()
if resp != 'y':
raise err
else:
raise err

state_path.parent.mkdir(parents=True, exist_ok=True) # not sure...
state = JsonState(
path=state_path,
logger=self.logger,
Expand Down Expand Up @@ -221,11 +239,12 @@ def get_items(self) -> Iterable[OrgWithKey]:

@classmethod
def main(cls, setup_parser=None) -> None:
default_state = orger_user_dir() / 'states' / (cls.name() + '.state.json')
p = cls.parser()
p.add_argument('--to' , type=Path, default=Path(cls.name() + '.org') , help='file where new items are appended')
p.add_argument('--state', type=Path, default=Path(cls.name() + '.state.json'), help='state file for keeping track of handled items')
p.add_argument('--init', action='store_true')
p.add_argument('--dry-run', action='store_true')
p.add_argument('--to' , type=Path, default=Path(cls.name() + '.org') , help='file where new items are added')
p.add_argument('--state', type=Path, default=default_state, help='state file for keeping track of handled items')
p.add_argument('--init', action='store_true') # todo not sure if I really need it?
p.add_argument('--dry-run', action='store_true', help='Run without modifying the state file')
if setup_parser is not None:
setup_parser(p)

Expand Down
Loading

0 comments on commit c021cf0

Please sign in to comment.