Skip to content

Commit

Permalink
Large update
Browse files Browse the repository at this point in the history
- Substituted _internetarchive_ Internet Archive API for _wayback_ Wayback Machine API. Online upload process now solid and reliable
- Bookmark loop full log option
- Logs styled
- Log levels implemented
    - CLI
  • Loading branch information
alopezrivera committed Aug 6, 2021
1 parent 5906832 commit adb7dcd
Show file tree
Hide file tree
Showing 14 changed files with 215 additions and 131 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ jobs:
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Install pdoc3
run: |
pip install pdoc3
- name: Generate docs
run: pdoc --html anchorage --force --output-dir=docs
- name: Push new docs to anchorage-docs.github.io
Expand Down
6 changes: 0 additions & 6 deletions anchorage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
"""
.. include:: ./documentation.md
"""

from anchorage.anchor_infrs.infrastructure import init
from anchorage.bookmarks import bookmarks, path, load
from anchorage.anchor import anchor_locally, anchor_online
from anchorage.anchor_tools.local import add as add_local, server
from anchorage.anchor_tools.online import add as add_online
43 changes: 27 additions & 16 deletions anchorage/anchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,55 @@
from anchorage.anchor_tools.online import add as add_online


def anchor_locally(collection,
archive="./anchor"):
def anchor_online(collection, overwrite,
loglevel=20):
"""
Archive all bookmarked pages in a local ArchiveBox archive.
Anchor all bookmarks in an online archive.
:param collection: Bookmark JSON file or __bookmark__ class instance.
:param archive: Full path of chosen archive. Defaults to "/anchor".
:param loglevel: 0 - Full log output
20 - Progress bar
50 - Suppress all output
"""

create_archive(os.path.abspath(archive))

if isinstance(collection, bookmarks):
err = collection.loop(lambda k, v: add_local(url=v["url"]),
err = collection.loop(lambda k, v: add_online(url=v["url"], overwrite=overwrite),
loglevel=loglevel,
pb_label="ARCHIVING",
suppress_output=True)
)
else:
err = bookmarks(collection).loop(lambda k, v: add_local(url=v["url"]),
err = bookmarks(collection).loop(lambda k, v: add_online(url=v["url"], overwrite=overwrite),
loglevel=loglevel,
pb_label="ARCHIVING",
suppress_output=True)
)

log_anchorage(err)


def anchor_online(collection, overwrite):
def anchor_locally(collection, archive="./anchor",
loglevel=20):
"""
Anchor all bookmarks in an online archive.
Archive all bookmarked pages in a local ArchiveBox archive.
:param collection: Bookmark JSON file or __bookmark__ class instance.
:param archive: Full path of chosen archive. Defaults to "/anchor".
:param loglevel: 0 - Full log output
20 - Progress bar
50 - Suppress all output
"""

create_archive(os.path.abspath(archive))

if isinstance(collection, bookmarks):
err = collection.loop(lambda k, v: add_online(url=v["url"], overwrite=overwrite),
err = collection.loop(lambda k, v: add_local(url=v["url"]),
loglevel=loglevel,
pb_label="ARCHIVING",
suppress_output=True)
)
else:
err = bookmarks(collection).loop(lambda k, v: add_online(url=v["url"], overwrite=overwrite),
err = bookmarks(collection).loop(lambda k, v: add_local(url=v["url"]),
loglevel=loglevel,
pb_label="ARCHIVING",
suppress_output=True)
)

log_anchorage(err)

Expand Down
19 changes: 9 additions & 10 deletions anchorage/anchor_tools/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import shutil
import webbrowser

from Alexandria.general.console import print_color
from alexandria.shell import print_color

from anchorage.anchor_utils.shell import shell, bckgr, raise_error, error, suppress_stdout
from anchorage.anchor_utils.aesthetic import smart_print_color


def archivebox(command_str):
Expand Down Expand Up @@ -110,10 +109,10 @@ def docker_check():
try:
raise_error('docker')
except error() as e:
smart_print_color(f"Error: attempt to run 'docker' command in your shell failed. "
f" Install Docker if you don't have it installed."
f" Check your Docker installation by running 'docker --help' in your shell.",
'red')
print_color(f"Error: attempt to run 'docker' command in your shell failed. "
f" Install Docker if you don't have it installed."
f" Check your Docker installation by running 'docker --help' in your shell.",
'red')
sys.exit()


Expand All @@ -126,8 +125,8 @@ def archivebox_check():
try:
raise_error('archivebox')
except error() as e:
smart_print_color(f"Error: attempt to run 'archivebox' command in your shell failed. "
f" Install ArchiveBox if you don't have it installed."
f" Check your ArchiveBox installation by running 'archivebox' in your shell.",
'red')
print_color(f"Error: attempt to run 'archivebox' command in your shell failed. "
f" Install ArchiveBox if you don't have it installed."
f" Check your ArchiveBox installation by running 'archivebox' in your shell.",
'red')
sys.exit()
53 changes: 33 additions & 20 deletions anchorage/anchor_tools/online.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,51 @@
from internetarchive import get_item
from wayback import WaybackClient
from archivenow import archivenow

from Alexandria.general.console import print_color
from alexandria.shell import print_color, suppress_stdout
from alexandria.shell.color import colors

from anchorage.anchor_utils.shell import shell
from anchorage.anchor_utils.aesthetic import str_log_info, str_log_error, str_log_success


def add(url, archives=None, overwrite=False):
def add(url, archive='ia', api_key=None, overwrite=False):
"""
Archive a website in one of the four archives supported by Archive Now (archivenow).
TODO: Recognize internetarchive upload error messages as failures.
:param url: URL of website to be archived.
:param archives: List or string of flags specifying in which archives to save the website.
Available flags:
- https://pypi.org/project/archivenow/
Example:
"all"
"--ia --is"
["--ia", "--is"]
:param archive: List or string specifying archives to which to save the website.
Available archives:
- 'all': All archives
- 'ia': Internet Archive (default)
- 'is': Archive.is
- 'mg': Megalodon.jp
- 'cc': Perma.cc
:param api_key: Perma.cc API key. Format:
{"cc_api_key":"$YOUR-Perma-cc-API-KEY"}
:param overwrite: Archive URL even if it's already present in the Internet Archive.
"""

def upload(url):
flags = ' '.join(archives) if archives is list else archives if not isinstance(archives, type(None)) else ""
try:
copy = shell(f"archivenow {flags} {url}")
return copy.stdout, copy.stderr
except:
print("Error archiving: ", end="")
print_color(url, "red")
if archive == 'cc':
with suppress_stdout():
archive_url = archivenow.push(url, archive, api_key)[0]
else:
with suppress_stdout():
archive_url = archivenow.push(url, archive)[0]
log = str_log_success(url + " -> -> -> " + archive_url)
return log
except BaseException as e:
print(str_log_error(url))
print_color(e, "red")

if get_item(url).exists:
try:
archive_latest = next(WaybackClient().search(url)) # Search for URL using the WaybackMachine API
if overwrite:
return upload(url)
else:
return "Bookmark already present in the Internet Archive"
else:
return str_log_info("SKIPPED", url + " => => => " + archive_latest[7])
except:
# If bookmark search yields "0" error (defined by Python _wayback_)
return upload(url)
91 changes: 62 additions & 29 deletions anchorage/anchor_utils/aesthetic.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,73 @@
import os
import sys
from pyfiglet import Figlet

from Alexandria.general.console import print_color
from alexandria.data_structs.string import join_set_distance
from alexandria.shell import print_color, str_log, str_color

from anchorage.anchor_utils.system import operating_system

def str_log_info(kind, msg):
return str_log(kind, msg,
msg_color="", msg_bg_color="",
)

def supports_color():
"""
From Django - https://github.com/django/django/blob/main/django/core/management/color.py

Returns True if the running system's terminal supports color, and False
otherwise.
"""
plat = sys.platform
supported_platform = plat != 'Pocket PC' and (plat != 'win32' or
'ANSICON' in os.environ)
# isatty is not always implemented, #6223.
is_a_tty = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
return supported_platform and is_a_tty
def str_log_success(msg):
return str_log("SUCCESS", msg,
msg_color="brightCyan", msg_bg_color="",
kind_color="brightGreen", kind_bg_color=""
)


def smart_print_color(text, color, **kwargs):
"""
Print in color in UNIX terminals, and use the regular `print` function
elsewhere.
def str_log_error(msg):
return str_log("FAILURE", msg,
msg_color="red", msg_bg_color="",
kind_color="brightRed", kind_bg_color=""
)

:param text: Text to be printed to screen
:param color: Text color
:param kwargs: Any further keyword arguments for the `print` function
"""
if supports_color():
print_color(text, color, **kwargs)
else:
print(text, **kwargs)

def str_log_progress(fraction, t_elapsed, t_remaining,
c_fr="brightCyan", c_bg_fr="",
c_te="", c_bg_te="",
c_tr="brightCyan", c_bg_tr="",
l=11, m=15, n=20):
# Times
def s_to_hms(_n):
hours, remainder = divmod(_n, 3600)
minutes, seconds = divmod(remainder, 60)
return f'{int(hours)}:{int(minutes)}:{seconds:.2f}'

t_elapsed = s_to_hms(t_elapsed)
t_remaining = "to go -> " + s_to_hms(t_remaining)

# Set distances
r = join_set_distance(fraction, "::", l)
r = join_set_distance(r, t_elapsed, m)
r = join_set_distance(r, " ::", n)
r = join_set_distance(r, t_remaining, len(r)+2)

str_t, str_kind, str_msg = r.split(" :: ")

# Take care not to color whitespace for KIND string
# It is assumed that all possible whitespace has
# been added by <<join_set_distance>> and is thus
# placed at the end of the string.
def countstrip(s):
n0 = len(s)
s = s.rstrip()
n1 = len(str_kind)
n_ws = n0 - n1
return s, n_ws
str_fraction, n_ws_fr = countstrip(fraction)
str_te, n_ws_te = countstrip(t_elapsed)
str_tr, n_ws_tr = countstrip(t_remaining)

# Color
c_str_fr = str_color(str_t, c_fr, c_bg_fr) + " " * n_ws_fr
c_str_te = str_color(str_kind, c_te, c_bg_te) + " " * n_ws_te
c_str_tr = str_color(str_msg, c_tr, c_bg_tr) + " " * n_ws_tr

# Join in final string
r = " :: ".join([c_str_fr, c_str_te, c_str_tr])
return r


def newline():
Expand All @@ -53,7 +86,7 @@ def title(text="Anchorage", font="big", color="cyan"):
:param color: Title color
"""
f = Figlet(font=font)
smart_print_color(f.renderText(text), color)
print_color(f.renderText(text), color)


class colors:
Expand Down
5 changes: 3 additions & 2 deletions anchorage/anchor_utils/regex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import sys
from anchorage.anchor_utils.aesthetic import smart_print_color

from alexandria.shell import print_color


def expr_check(expr):
Expand All @@ -13,5 +14,5 @@ def expr_check(expr):
try:
re.compile(expr)
except re.error:
smart_print_color("\n Incorrect regex formula (regex compile error)\n", "red")
print_color("\n Incorrect regex formula (regex compile error)\n", "red")
sys.exit()
Loading

0 comments on commit adb7dcd

Please sign in to comment.