Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added non-git source puller functionality #194

Open
wants to merge 45 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ea87f2b
Command-line argument repo_dir is changed
sean-morris Jun 24, 2021
10385bb
Added non-git source puller functionality
sean-morris Jun 24, 2021
ab80daf
Added async functionality to non-git archives
sean-morris Aug 11, 2021
71ca2f4
Update nbgitpuller/plugin_helper.py
sean-morris Nov 3, 2021
ae66e53
Update nbgitpuller/hookspecs.py
sean-morris Nov 3, 2021
8934f5f
renamed and simplified the test_files
sean-morris Nov 4, 2021
ac2072c
added README to plugins
sean-morris Nov 4, 2021
a84096d
added docstring to progress_loop function
sean-morris Nov 4, 2021
86fd7bf
Update tests/test_download_puller.py
sean-morris Nov 4, 2021
c686651
Update tests/test_download_puller.py
sean-morris Nov 4, 2021
f8e04f1
Removed Downloader Plugins from Repo
sean-morris Nov 6, 2021
958b0b1
Added Custom Exception for Bad Provider
sean-morris Nov 6, 2021
2048e8d
Merge branch 'main' of https://github.com/jupyterhub/nbgitpuller
sean-morris Nov 8, 2021
398a03f
merged from master and fixed conflicts
sean-morris Nov 8, 2021
9a8fcab
Removed unused import from test file
sean-morris Nov 8, 2021
78e31c3
Added packages to dev-requirements.txt
sean-morris Nov 8, 2021
a131b93
Moved the two constants and REPO_PARENT_DIR out of __init__.py
sean-morris Nov 10, 2021
55da5e1
Revert some trivial formatting changes
consideRatio Nov 17, 2021
0ca6cf9
Apply suggestions from code review
sean-morris Nov 17, 2021
9e808e5
Changes from code review
sean-morris Nov 17, 2021
8d63ee4
Apply suggestions from code review
sean-morris Nov 19, 2021
deecc7b
Removed setTerminalVisibility from automatically opening in UI
sean-morris Nov 23, 2021
a9e08c4
Reverted a mistaken change to command-line args
sean-morris Nov 23, 2021
09c9249
Hookspecs renamed and documented
sean-morris Nov 23, 2021
0085fab
Hookspecs name and seperate helper_args
sean-morris Nov 23, 2021
88ec806
Renamed for clarity
sean-morris Nov 24, 2021
8592d1f
Seperated actual query_line_args from helper_args
sean-morris Nov 24, 2021
21d8f0f
fixed conflicts
sean-morris Nov 24, 2021
ab5dd10
Fixed tests
sean-morris Nov 24, 2021
e8ae5ca
Removed changes not meant to merged
sean-morris Nov 26, 2021
56ad1ee
Apply suggestions from code review
sean-morris Nov 29, 2021
af567ca
Refactored docstrings
sean-morris Nov 29, 2021
782a35b
Refactored docstrings
sean-morris Nov 29, 2021
d034d37
Merge branch 'non-git' of https://github.com/sean-morris/nbgitpuller …
sean-morris Nov 29, 2021
9729464
Fix temp download dir to use the package tempfile
sean-morris Nov 30, 2021
602ef01
provider is now contentProvider in the html/js/query parameters
sean-morris Nov 30, 2021
3ebdc7e
The download_func and download_func_params brought in separately
sean-morris Nov 30, 2021
e22d076
Moved the handle_files_helper in Class
sean-morris Dec 1, 2021
3b14405
Moved downloader-plugin util to own repo
sean-morris Dec 20, 2021
613f863
Moved downloader-plugin util to own repo
sean-morris Dec 20, 2021
5f39c68
Merge branch 'non-git' of https://github.com/sean-morris/nbgitpuller …
sean-morris Dec 20, 2021
f618560
Removed nested_asyncio from init.py
sean-morris Jan 11, 2022
367f3c7
Moved downloader-plugin handling to puller thread
sean-morris Jan 15, 2022
8893970
Moved downloader plugins handling to pull.py
sean-morris Jan 19, 2022
7590c38
Access downloader-plugin results from plugin instance variable
sean-morris Jan 19, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include *.md
include LICENSE
include setup.cfg
recursive-include nbgitpuller/plugins *
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
recursive-include nbgitpuller/static *
recursive-include nbgitpuller/templates *
38 changes: 34 additions & 4 deletions nbgitpuller/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

from .pull import GitPuller
from .version import __version__
from .hookspecs import handle_files
from .plugins.zip_puller import ZipSourceGoogleDriveDownloader
from .plugins.zip_puller import ZipSourceDropBoxDownloader
from .plugins.zip_puller import ZipSourceWebDownloader
import pluggy


class SyncHandler(IPythonHandler):
Expand Down Expand Up @@ -38,6 +43,17 @@ def emit(self, data):
self.write('data: {}\n\n'.format(serialized_data))
yield self.flush()

def setup_plugins(self, repo):
pm = pluggy.PluginManager("nbgitpuller")
pm.add_hookspecs(handle_files)
if "drive.google.com" in repo:
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
pm.register(ZipSourceGoogleDriveDownloader())
elif "dropbox.com" in repo:
pm.register(ZipSourceDropBoxDownloader())
else:
pm.register(ZipSourceWebDownloader())
return pm

@web.authenticated
@gen.coroutine
def get(self):
Expand All @@ -53,6 +69,7 @@ def get(self):
try:
repo = self.get_argument('repo')
branch = self.get_argument('branch', None)
compressed = self.get_argument('compressed', "false")
depth = self.get_argument('depth', None)
if depth:
depth = int(depth)
Expand All @@ -73,6 +90,12 @@ def get(self):
self.set_header('content-type', 'text/event-stream')
self.set_header('cache-control', 'no-cache')

if compressed == 'true':
pm = self.setup_plugins(repo)
results = pm.hook.handle_files(repo=repo, repo_parent_dir=repo_parent_dir)[0]
repo_dir = repo_parent_dir + results["unzip_dir"]
repo = "file://" + results["origin_repo_path"]

gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'])

q = Queue()
Expand Down Expand Up @@ -151,14 +174,15 @@ def get(self):
repo = self.get_argument('repo')
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
branch = self.get_argument('branch', None)
depth = self.get_argument('depth', None)
compressed = self.get_argument('compressed', "false")
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
urlPath = self.get_argument('urlpath', None) or \
self.get_argument('urlPath', None)
self.get_argument('urlPath', None)
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
subPath = self.get_argument('subpath', None) or \
self.get_argument('subPath', '.')
self.get_argument('subPath', '.')
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
app = self.get_argument('app', app_env)
parent_reldir = os.getenv('NBGITPULLER_PARENTPATH', '')
targetpath = self.get_argument('targetpath', None) or \
self.get_argument('targetPath', repo.split('/')[-1])
self.get_argument('targetPath', repo.split('/')[-1])
consideRatio marked this conversation as resolved.
Show resolved Hide resolved

if urlPath:
path = urlPath
Expand All @@ -174,7 +198,13 @@ def get(self):
self.write(
self.render_template(
'status.html',
repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__
repo=repo,
branch=branch,
compressed=compressed,
path=path,
depth=depth,
targetpath=targetpath,
version=__version__
))
self.flush()

Expand Down
21 changes: 21 additions & 0 deletions nbgitpuller/hookspecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pluggy
sean-morris marked this conversation as resolved.
Show resolved Hide resolved

hookspec = pluggy.HookspecMarker("nbgitpuller")


@hookspec
def handle_files(self, repo, repo_parent_dir):
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
"""
:param str repo: download url to source
:param str repo_parent_dir: where we will store the downloaded repo
:return two parameter json unzip_dir and origin_repo_path
:rtype json object
This handles the downloading of non-git source
files into the user directory. Once downloaded,
the files are merged into a local git repository.

Once the local git repository is updated(or created
the first time), git puller can then handle this
directory as it would sources coming from a
git repository.
"""
Empty file added nbgitpuller/plugins/__init__.py
Empty file.
116 changes: 116 additions & 0 deletions nbgitpuller/plugins/plugin_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import subprocess
import os
import logging
import requests
from requests_file import FileAdapter
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
import shutil
import re


# for large files from Google Drive
def get_confirm_token(response):
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None


# sets up the a local repo that acts like a remote
def initialize_local_repo(local_repo_path):
logging.info(f"Creating local_repo_path: {local_repo_path}")
os.makedirs(local_repo_path, exist_ok=True)

subprocess.check_output(["git", "init", "--bare"], cwd=local_repo_path)


# local repo cloned from the "remote" which is in user drive
def clone_local_origin_repo(origin_repo_path, temp_download_repo):
logging.info(f"Creating temp_download_repo: {temp_download_repo}")
os.makedirs(temp_download_repo, exist_ok=True)

cmd = ["git", "clone", f"file://{origin_repo_path}", temp_download_repo]
subprocess.check_output(cmd, cwd=temp_download_repo)


# this is needed to unarchive various formats(eg. zip, tgz, etc)
def determine_file_extension(url, response):
file_type = response.headers.get('content-type')
content_disposition = response.headers.get('content-disposition')
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
ext = None
if content_disposition:
fname = re.findall("filename\\*?=([^;]+)", content_disposition)
fname = fname[0].strip().strip('"')
ext = fname.split(".")[1]
elif file_type and "/zip" in file_type:
ext = "zip"
else:
url = url.split("/")[-1]
if "?" in url:
url = url[0:url.find('?')]
if "." in url:
ext = url.split(".")[1]

if not ext:
m = f"Could not determine the file extension for unarchiving: {url}"
raise Exception(m)
return ext


# the downloaded content is in the response -- unarchive and save to the disk
def save_response_content(url, response, temp_download_repo):
try:
ext = determine_file_extension(url, response)
CHUNK_SIZE = 32768
temp_download_file = f"{temp_download_repo}/download.{ext}"
with open(temp_download_file, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
# filter out keep-alive new chunks
if chunk:
f.write(chunk)

shutil.unpack_archive(temp_download_file, temp_download_repo)
sean-morris marked this conversation as resolved.
Show resolved Hide resolved

os.remove(temp_download_file)
except Exception as e:
m = f"Problem handling file download: {str(e)}"
raise Exception(m)


# grab archive file from url
def fetch_files(url, id=-1):
session = requests.Session()
session.mount('file://', FileAdapter()) # add adapter for pytests
response = session.get(url, params={'id': id}, stream=True)
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm': token}
response = session.get(url, params=params, stream=True)

return response


# this drive the file handling -- called from zip_puller by all the
# handle_files implementations for GoogleDrive, Dropbox, and standard
# Web url
def handle_files_helper(args):
try:
origin_repo = args["repo_parent_dir"] + args["origin_dir"]
temp_download_repo = args["repo_parent_dir"] + args["download_dir"]
if os.path.exists(temp_download_repo):
shutil.rmtree(temp_download_repo)

if not os.path.exists(origin_repo):
initialize_local_repo(origin_repo)

clone_local_origin_repo(origin_repo, temp_download_repo)
save_response_content(args["repo"], args["response"], temp_download_repo)
subprocess.check_output(["git", "add", "."], cwd=temp_download_repo)
subprocess.check_output(["git", "-c", "[email protected]", "-c", "user.name=nbgitpuller", "commit", "-m", "test", "--allow-empty"], cwd=temp_download_repo)
subprocess.check_output(["git", "push", "origin", "master"], cwd=temp_download_repo)
unzipped_dirs = os.listdir(temp_download_repo)

dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs))
return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo}
except Exception as e:
logging.exception(e)
raise ValueError(e)
79 changes: 79 additions & 0 deletions nbgitpuller/plugins/zip_puller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from .plugin_helper import fetch_files
from .plugin_helper import handle_files_helper
import pluggy

hookimpl = pluggy.HookimplMarker("nbgitpuller")
TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo"
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources"


# handles standard web addresses(not google drive or dropbox)
class ZipSourceWebDownloader(object):
sean-morris marked this conversation as resolved.
Show resolved Hide resolved
@hookimpl
def handle_files(self, repo, repo_parent_dir):
"""
:param str repo: publicly accessible url to compressed source files
:param str repo_parent_dir: where we will store the downloaded repo
:return two parameter json unzip_dir and origin_repo_path
:rtype json object
"""
response = fetch_files(repo)
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)


# handles downloads from google drive
class ZipSourceGoogleDriveDownloader(object):
def __init__(self):
self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download"

def get_id(self, repo):
start_id_index = repo.index("d/") + 2
end_id_index = repo.index("/view")
return repo[start_id_index:end_id_index]

@hookimpl
def handle_files(self, repo, repo_parent_dir):
"""
:param str repo: google drive share link to compressed source files
:param str repo_parent_dir: where we will store the downloaded repo
:return two parameter json unzip_dir and origin_repo_path
:rtype json object
"""
response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo))
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)


# handles downloads from DropBox
class ZipSourceDropBoxDownloader(object):
@hookimpl
def handle_files(self, repo, repo_parent_dir):
"""
:param str repo: dropbox download link to compressed source files
:param str repo_parent_dir: where we will store the downloaded repo
:return two parameter json unzip_dir and origin_repo_path
:rtype json object
"""
repo = repo.replace("dl=0", "dl=1") # download set to 1 for dropbox
response = fetch_files(repo)
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)
7 changes: 6 additions & 1 deletion nbgitpuller/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ require([

Terminal.applyAddon(fit);

function GitSync(baseUrl, repo, branch, depth, targetpath, path) {
function GitSync(baseUrl, repo, branch, depth, compressed, targetpath, path) {
// Class that talks to the API backend & emits events as appropriate
this.baseUrl = baseUrl;
this.repo = repo;
this.branch = branch;
this.depth = depth;
this.compressed = compressed;
this.targetpath = targetpath;
this.redirectUrl = baseUrl + path;

Expand Down Expand Up @@ -52,6 +53,9 @@ require([
if (typeof this.branch !== 'undefined' && this.branch != undefined) {
syncUrlParams['branch'] = this.branch;
}
if (typeof this.compressed !== 'undefined' && this.compressed != undefined) {
syncUrlParams['compressed'] = this.compressed;
}
var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams);

this.eventSource = new EventSource(syncUrl);
Expand Down Expand Up @@ -133,6 +137,7 @@ require([
utils.get_body_data('repo'),
utils.get_body_data('branch'),
utils.get_body_data('depth'),
utils.get_body_data('compressed'),
utils.get_body_data('targetpath'),
utils.get_body_data('path')
);
Expand Down
1 change: 1 addition & 0 deletions nbgitpuller/templates/status.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
data-path="{{ path | urlencode }}"
{% if branch %}data-branch="{{ branch | urlencode }}"{% endif %}
{% if depth %}data-depth="{{ depth | urlencode }}"{% endif %}
{% if compressed %}data-compressed="{{ compressed | urlencode }}"{% endif %}
data-targetpath="{{ targetpath | urlencode }}"
{% endblock %}

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
packages=find_packages(),
include_package_data=True,
platforms='any',
install_requires=['notebook>=5.5.0', 'tornado'],
install_requires=['notebook>=5.5.0', 'tornado', 'requests', 'requests-file'],
data_files=[
('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/nbgitpuller.json'])
],
Expand Down
Loading