-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added non-git source puller functionality
Handles non-git source compressed archives from google drive, dropbox, and any publicly available web address.
- Loading branch information
1 parent
1e57904
commit 6d76554
Showing
11 changed files
with
1,721 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
include *.md | ||
include LICENSE | ||
include setup.cfg | ||
recursive-include nbgitpuller/plugins * | ||
recursive-include nbgitpuller/static * | ||
recursive-include nbgitpuller/templates * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pluggy | ||
|
||
hookspec = pluggy.HookspecMarker("nbgitpuller") | ||
|
||
|
||
@hookspec | ||
def handle_files(self, repo, repo_parent_dir): | ||
""" | ||
:param str repo: download url to source | ||
:param str repo_parent_dir: where we will store the downloaded repo | ||
:return two parameter json unzip_dir and origin_repo_path | ||
:rtype json object | ||
This handles the downloading of non-git source | ||
files into the user directory. Once downloaded, | ||
the files are merged into a local git repository. | ||
Once the local git repository is updated(or created | ||
the first time), git puller can then handle this | ||
directory as it would sources coming from a | ||
git repository. | ||
""" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import subprocess | ||
import os | ||
import stat | ||
import logging | ||
import requests | ||
from requests_file import FileAdapter | ||
import shutil | ||
import re | ||
|
||
|
||
# for large files from Google Drive | ||
def get_confirm_token(response): | ||
for key, value in response.cookies.items(): | ||
if key.startswith('download_warning'): | ||
return value | ||
return None | ||
|
||
|
||
# sets up the a local repo that acts like a remote | ||
def initialize_local_repo(local_repo_path): | ||
logging.info(f"Creating local_repo_path: {local_repo_path}") | ||
os.makedirs(local_repo_path, exist_ok=True) | ||
|
||
subprocess.check_output(["git", "init", "--bare"], cwd=local_repo_path) | ||
|
||
|
||
# local repo cloned from the "remote" which is in user drive | ||
def clone_local_origin_repo(origin_repo_path, temp_download_repo): | ||
logging.info(f"Creating temp_download_repo: {temp_download_repo}") | ||
os.makedirs(temp_download_repo, exist_ok=True) | ||
|
||
cmd = ["git", "clone", f"file://{origin_repo_path}", temp_download_repo] | ||
subprocess.check_output(cmd, cwd=temp_download_repo) | ||
|
||
|
||
# this is needed to unarchive various formats(eg. zip, tgz, etc) | ||
def determine_file_extension(url, response): | ||
file_type = response.headers.get('content-type') | ||
content_disposition = response.headers.get('content-disposition') | ||
ext = None | ||
if content_disposition: | ||
fname = re.findall("filename\\*?=([^;]+)", content_disposition) | ||
fname = fname[0].strip().strip('"') | ||
ext = fname.split(".")[1] | ||
elif file_type and "/zip" in file_type: | ||
ext = "zip" | ||
else: | ||
url = url.split("/")[-1] | ||
if "?" in url: | ||
url = url[0:url.find('?')] | ||
if "." in url: | ||
ext = url.split(".")[1] | ||
|
||
if not ext: | ||
m = f"Could not determine the file extension for unarchiving: {url}" | ||
raise Exception(m) | ||
return ext | ||
|
||
|
||
# the downloaded content is in the response -- unarchive and save to the disk | ||
def save_response_content(url, response, temp_download_repo): | ||
try: | ||
ext = determine_file_extension(url, response) | ||
CHUNK_SIZE = 32768 | ||
temp_download_file = f"{temp_download_repo}/download.unk" | ||
with open(temp_download_file, "wb") as f: | ||
for chunk in response.iter_content(CHUNK_SIZE): | ||
# filter out keep-alive new chunks | ||
|
||
if chunk: | ||
f.write(chunk) | ||
|
||
shutil.unpack_archive(temp_download_file, temp_download_repo) | ||
|
||
os.remove(temp_download_file) | ||
except Exception as e: | ||
m = f"Problem handling file download: {str(e)}" | ||
raise Exception(m) | ||
|
||
|
||
# grab archive file from url | ||
def fetch_files(url, id=-1): | ||
session = requests.Session() | ||
session.mount('file://', FileAdapter()) # add adapter for pytests | ||
response = session.get(url, params={'id': id}, stream=True) | ||
token = get_confirm_token(response) | ||
if token: | ||
params = {'id': id, 'confirm': token} | ||
response = session.get(url, params=params, stream=True) | ||
|
||
return response | ||
|
||
|
||
# this drive the file handling -- called from zip_puller by all the | ||
# handle_files implementations for GoogleDrive, Dropbox, and standard | ||
# Web url | ||
def handle_files_helper(args): | ||
try: | ||
origin_repo = args["repo_parent_dir"] + args["origin_dir"] | ||
temp_download_repo = args["repo_parent_dir"] + args["download_dir"] | ||
if os.path.exists(temp_download_repo): | ||
shutil.rmtree(temp_download_repo) | ||
|
||
if not os.path.exists(origin_repo): | ||
initialize_local_repo(origin_repo) | ||
|
||
clone_local_origin_repo(origin_repo, temp_download_repo) | ||
save_response_content(args["repo"], args["response"], temp_download_repo) | ||
subprocess.check_output(["git", "add", "."], cwd=temp_download_repo) | ||
subprocess.check_output(["git", "-c", "[email protected]", "-c", "user.name=nbgitpuller", "commit", "-m", "test", "--allow-empty"], cwd=temp_download_repo) | ||
subprocess.check_output(["git", "push", "origin", "master"], cwd=temp_download_repo) | ||
unzipped_dirs = os.listdir(temp_download_repo) | ||
|
||
dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs)) | ||
return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo} | ||
except Exception as e: | ||
logging.exception(e) | ||
raise ValueError(e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from .plugin_helper import fetch_files | ||
from .plugin_helper import handle_files_helper | ||
import pluggy | ||
|
||
hookimpl = pluggy.HookimplMarker("nbgitpuller") | ||
TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo" | ||
CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources" | ||
|
||
|
||
# handles standard web addresses(not google drive or dropbox) | ||
class ZipSourceWebDownloader(object): | ||
@hookimpl | ||
def handle_files(self, repo, repo_parent_dir): | ||
""" | ||
:param str repo: publicly accessible url to compressed source files | ||
:param str repo_parent_dir: where we will store the downloaded repo | ||
:return two parameter json unzip_dir and origin_repo_path | ||
:rtype json object | ||
""" | ||
response = fetch_files(repo) | ||
args = { | ||
"repo": repo, | ||
"repo_parent_dir": repo_parent_dir, | ||
"response": response, | ||
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO, | ||
"download_dir": TEMP_DOWNLOAD_REPO_DIR | ||
} | ||
return handle_files_helper(args) | ||
|
||
|
||
# handles downloads from google drive | ||
class ZipSourceGoogleDriveDownloader(object): | ||
def __init__(self): | ||
self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download" | ||
|
||
def get_id(self, repo): | ||
start_id_index = repo.index("d/") + 2 | ||
end_id_index = repo.index("/view") | ||
return repo[start_id_index:end_id_index] | ||
|
||
@hookimpl | ||
def handle_files(self, repo, repo_parent_dir): | ||
""" | ||
:param str repo: google drive share link to compressed source files | ||
:param str repo_parent_dir: where we will store the downloaded repo | ||
:return two parameter json unzip_dir and origin_repo_path | ||
:rtype json object | ||
""" | ||
response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo)) | ||
args = { | ||
"repo": repo, | ||
"repo_parent_dir": repo_parent_dir, | ||
"response": response, | ||
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO, | ||
"download_dir": TEMP_DOWNLOAD_REPO_DIR | ||
} | ||
return handle_files_helper(args) | ||
|
||
|
||
# handles downloads from DropBox | ||
class ZipSourceDropBoxDownloader(object): | ||
@hookimpl | ||
def handle_files(self, repo, repo_parent_dir): | ||
""" | ||
:param str repo: dropbox download link to compressed source files | ||
:param str repo_parent_dir: where we will store the downloaded repo | ||
:return two parameter json unzip_dir and origin_repo_path | ||
:rtype json object | ||
""" | ||
repo = repo.replace("dl=0", "dl=1") # download set to 1 for dropbox | ||
response = fetch_files(repo) | ||
args = { | ||
"repo": repo, | ||
"repo_parent_dir": repo_parent_dir, | ||
"response": response, | ||
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO, | ||
"download_dir": TEMP_DOWNLOAD_REPO_DIR | ||
} | ||
return handle_files_helper(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.