From d02f6cd342a0dc09602338722f6851355e025f00 Mon Sep 17 00:00:00 2001
From: Sean Morris <sean.smorris@berkeley.edu>
Date: Wed, 23 Jun 2021 18:13:57 -0700
Subject: [PATCH] Added non-git source puller functionality

Handles non-git source compressed
archives from google drive, dropbox, and any publicly
available web address.
---
 MANIFEST.in                          |    1 +
 nbgitpuller/handlers.py              |   43 +-
 nbgitpuller/hookspecs.py             |   18 +
 nbgitpuller/plugins/__init__.py      |    0
 nbgitpuller/plugins/plugin_helper.py |  146 +++
 nbgitpuller/plugins/zip_puller.py    |   67 ++
 nbgitpuller/static/index.js          |    7 +-
 nbgitpuller/templates/status.html    |    1 +
 setup.py                             |    2 +-
 tests/test_files/hw/hw01/hw01.ipynb  | 1405 ++++++++++++++++++++++++++
 tests/test_zip_puller.py             |   55 +
 11 files changed, 1738 insertions(+), 7 deletions(-)
 create mode 100644 nbgitpuller/hookspecs.py
 create mode 100644 nbgitpuller/plugins/__init__.py
 create mode 100644 nbgitpuller/plugins/plugin_helper.py
 create mode 100644 nbgitpuller/plugins/zip_puller.py
 create mode 100644 tests/test_files/hw/hw01/hw01.ipynb
 create mode 100644 tests/test_zip_puller.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 607df237..0e8f8cc4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
 include *.md
 include LICENSE
 include setup.cfg
+recursive-include nbgitpuller/plugins *
 recursive-include nbgitpuller/static *
 recursive-include nbgitpuller/templates *
diff --git a/nbgitpuller/handlers.py b/nbgitpuller/handlers.py
index f83ad7d5..0e984dd3 100644
--- a/nbgitpuller/handlers.py
+++ b/nbgitpuller/handlers.py
@@ -11,6 +11,12 @@
 
 from .pull import GitPuller
 from .version import __version__
+from .hookspecs import NonGitSourceSpec
+from .plugins.zip_puller import ZipSourceGoogleDriveDownloader
+from .plugins.zip_puller import ZipSourceDropBoxDownloader
+from .plugins.zip_puller import ZipSourceWebDownloader
+import pluggy
+import distutils.util
 
 
 class SyncHandler(IPythonHandler):
@@ -38,6 +44,17 @@ def emit(self, data):
         self.write('data: {}\n\n'.format(serialized_data))
         yield self.flush()
 
+    def setup_plugins(self, repo):
+        pm = pluggy.PluginManager("nbgitpuller")
+        pm.add_hookspecs(NonGitSourceSpec)
+        if "drive.google.com" in repo:
+            pm.register(ZipSourceGoogleDriveDownloader())
+        elif "dropbox.com" in repo:
+            pm.register(ZipSourceDropBoxDownloader())
+        else:
+            pm.register(ZipSourceWebDownloader())
+        return pm
+
     @web.authenticated
     @gen.coroutine
     def get(self):
@@ -53,6 +70,7 @@ def get(self):
         try:
             repo = self.get_argument('repo')
             branch = self.get_argument('branch', None)
+            compressed = self.get_argument('compressed', "false")
             depth = self.get_argument('depth', None)
             if depth:
                 depth = int(depth)
@@ -73,6 +91,12 @@ def get(self):
             self.set_header('content-type', 'text/event-stream')
             self.set_header('cache-control', 'no-cache')
 
+            if distutils.util.strtobool(compressed):
+                pm = self.setup_plugins(repo)
+                results = pm.hook.handle_files(repo=repo, repo_parent_dir=repo_parent_dir)[0]
+                repo_dir = repo_parent_dir + results["unzip_dir"]
+                repo = "file://" + results["origin_repo_path"]
+
             gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'])
 
             q = Queue()
@@ -151,16 +175,19 @@ def get(self):
         repo = self.get_argument('repo')
         branch = self.get_argument('branch', None)
         depth = self.get_argument('depth', None)
+        compressed = self.get_argument('compressed', "false")
         urlPath = self.get_argument('urlpath', None) or \
-                  self.get_argument('urlPath', None)
+            self.get_argument('urlPath', None)
         subPath = self.get_argument('subpath', None) or \
-                  self.get_argument('subPath', '.')
+            self.get_argument('subPath', '.')
         app = self.get_argument('app', app_env)
         parent_reldir = os.getenv('NBGITPULLER_PARENTPATH', '')
         targetpath = self.get_argument('targetpath', None) or \
-                     self.get_argument('targetPath', repo.split('/')[-1])
+            self.get_argument('targetPath', repo.split('/')[-1])
 
-        if urlPath:
+        if distutils.util.strtobool(compressed):
+            path = 'tree/'
+        elif urlPath:
             path = urlPath
         else:
             path = os.path.join(parent_reldir, targetpath, subPath)
@@ -174,7 +201,13 @@ def get(self):
         self.write(
             self.render_template(
                 'status.html',
-                repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__
+                repo=repo,
+                branch=branch,
+                compressed=compressed,
+                path=path,
+                depth=depth,
+                targetpath=targetpath,
+                version=__version__
             ))
         self.flush()
 
diff --git a/nbgitpuller/hookspecs.py b/nbgitpuller/hookspecs.py
new file mode 100644
index 00000000..ba90c8e6
--- /dev/null
+++ b/nbgitpuller/hookspecs.py
@@ -0,0 +1,18 @@
+import pluggy
+
+hookspec = pluggy.HookspecMarker("nbgitpuller")
+
+
+class NonGitSourceSpec(object):
+    @hookspec
+    def handle_files(self, repo, repo_parent_dir):
+        """
+         This handles the downloading of non-git source
+         files into the user directory. Once downloaded,
+         the files are merged into a local git repository.
+
+         Once the local git repository is updated(or created
+         the first time), git puller can then handle this
+         directory as it would sources coming from a
+         git repository.
+        """
diff --git a/nbgitpuller/plugins/__init__.py b/nbgitpuller/plugins/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/nbgitpuller/plugins/plugin_helper.py b/nbgitpuller/plugins/plugin_helper.py
new file mode 100644
index 00000000..313f2bf6
--- /dev/null
+++ b/nbgitpuller/plugins/plugin_helper.py
@@ -0,0 +1,146 @@
+import subprocess
+import os
+import stat
+import logging
+import requests
+from requests_file import FileAdapter
+import shutil
+import re
+
+
+# for large files from Google Drive
+def get_confirm_token(response):
+    for key, value in response.cookies.items():
+        if key.startswith('download_warning'):
+            return value
+    return None
+
+
+# sets up the a local repo that acts like a remote
+def initialize_local_repo(local_repo_path):
+    logging.info(f"Creating local_repo_path: {local_repo_path}")
+    try:
+        os.makedirs(local_repo_path)
+    except OSError:
+        logging.info(f"Directory exists: {local_repo_path}")
+
+    subprocess_helper("git init --bare", local_repo_path)
+
+    # Make our bare repository serveable over dumb HTTP
+    hook_path = os.path.join(local_repo_path, 'hooks', 'post-update')
+    os.rename(
+        os.path.join(local_repo_path, 'hooks', 'post-update.sample'),
+        hook_path
+    )
+    os.chmod(hook_path, os.stat(hook_path).st_mode | stat.S_IEXEC)
+
+
+# local repo cloned from the "remote" which is in user drive
+def clone_local_origin_repo(origin_repo_path, temp_download_repo):
+    logging.info(f"Creating temp_download_repo: {temp_download_repo}")
+    try:
+        os.makedirs(temp_download_repo)
+    except OSError:
+        logging.info(f"Directory exists: {temp_download_repo}")
+
+    cmd = f"git clone file://{origin_repo_path} {temp_download_repo}"
+    subprocess_helper(cmd, temp_download_repo)
+
+
+# this is needed to unarchive various formats(eg. zip, tgz, etc)
+def determine_file_extension(url, response):
+    file_type = response.headers.get('content-type')
+    content_disposition = response.headers.get('content-disposition')
+    ext = None
+    if content_disposition:
+        fname = re.findall("filename\*?=([^;]+)", content_disposition)
+        fname = fname[0].strip().strip('"')
+        ext = fname.split(".")[1]
+    elif file_type and "/zip" in file_type:
+        ext = "zip"
+    else:
+        url = url.split("/")[-1]
+        if "?" in url:
+            url = url[0:url.find('?')]
+        if "." in url:
+            ext = url.split(".")[1]
+
+    if not ext:
+        m = f"Could not determine the file extension for unarchiving: {url}"
+        raise Exception(m)
+    return ext
+
+
+# the downloaded content is in the response -- unarchive and save to the disk
+def save_response_content(url, response, temp_download_repo):
+    try:
+        ext = determine_file_extension(url, response)
+        CHUNK_SIZE = 32768
+        temp_download_file = f"{temp_download_repo}/download.{ext}"
+        with open(temp_download_file, "wb") as f:
+            for chunk in response.iter_content(CHUNK_SIZE):
+                # filter out keep-alive new chunks
+                if chunk:
+                    f.write(chunk)
+
+        shutil.unpack_archive(temp_download_file, temp_download_repo)
+
+        os.remove(temp_download_file)
+    except Exception as e:
+        m = f"Problem handling file download: {str(e)}"
+        raise Exception(m)
+
+
+# grab archive file from url
+def fetch_files(url, id=-1):
+    session = requests.Session()
+    session.mount('file://', FileAdapter())  # add adapter for pytests
+    response = session.get(url, params={'id': id}, stream=True)
+    token = get_confirm_token(response)
+    if token:
+        params = {'id': id, 'confirm': token}
+        response = session.get(url, params=params, stream=True)
+
+    return response
+
+
+# this drive the file handling -- called from zip_puller by all the
+# handle_files implementations for GoogleDrive, Dropbox, and standard
+# Web url
+def handle_files_helper(args):
+    try:
+        origin_repo = args["repo_parent_dir"] + args["origin_dir"]
+        temp_download_repo = args["repo_parent_dir"] + args["download_dir"]
+        if os.path.exists(temp_download_repo):
+            shutil.rmtree(temp_download_repo)
+
+        if not os.path.exists(origin_repo):
+            initialize_local_repo(origin_repo)
+
+        clone_local_origin_repo(origin_repo, temp_download_repo)
+        save_response_content(args["repo"], args["response"], temp_download_repo)
+        subprocess_helper("git add .", temp_download_repo)
+        subprocess_helper("git -c user.email=nbgitpuller@nbgitpuller.link -c user.name=nbgitpuller commit -m test --allow-empty", temp_download_repo)
+        subprocess_helper("git push origin master", temp_download_repo)
+        unzipped_dirs = os.listdir(temp_download_repo)
+
+        dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs))
+        return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo}
+    except Exception as e:
+        logging.exception(e)
+        raise ValueError(e)
+
+
+# executes git commands for us
+def subprocess_helper(cmd, cwd):
+    try:
+        subprocess.run(
+            cmd.split(" "),
+            capture_output=True,
+            text=True,
+            check=True,
+            cwd=cwd
+        )
+    except Exception:
+        m = f"Problem executing git command: {cmd}"
+        raise Exception(m)
diff --git a/nbgitpuller/plugins/zip_puller.py b/nbgitpuller/plugins/zip_puller.py
new file mode 100644
index 00000000..85d59399
--- /dev/null
+++ b/nbgitpuller/plugins/zip_puller.py
@@ -0,0 +1,67 @@
+from .plugin_helper import fetch_files
+from .plugin_helper import handle_files_helper
+import pluggy
+
+hookimpl = pluggy.HookimplMarker("nbgitpuller")
+TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo"
+CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources"
+
+
+# handles standard web addresses(not google drive or dropbox)
+class ZipSourceWebDownloader(object):
+    @hookimpl
+    # repo --> this is download url
+    # repo_parent_dir --> where we will create the repo
+    def handle_files(self, repo, repo_parent_dir):
+        response = fetch_files(repo)
+        args = {
+            "repo": repo,
+            "repo_parent_dir": repo_parent_dir,
+            "response": response,
+            "origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
+            "download_dir": TEMP_DOWNLOAD_REPO_DIR
+        }
+        return handle_files_helper(args)
+
+
+# handles downloads from google drive
+class ZipSourceGoogleDriveDownloader(object):
+    def __init__(self):
+        self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download"
+
+    def get_id(self, repo):
+        start_id_index = repo.index("d/") + 2
+        end_id_index = repo.index("/view")
+        return repo[start_id_index:end_id_index]
+
+    # repo --> this is download url - it has the id
+    # repo_parent_dir --> where we will create the repo
+    @hookimpl
+    def handle_files(self, repo, repo_parent_dir):
+        response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo))
+        args = {
+            "repo": repo,
+            "repo_parent_dir": repo_parent_dir,
+            "response": response,
+            "origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
+            "download_dir": TEMP_DOWNLOAD_REPO_DIR
+        }
+        return handle_files_helper(args)
+
+
+# handles downloads from DropBox
+class ZipSourceDropBoxDownloader(object):
+    # repo --> this is download url
+    # repo_parent_dir --> where we will create the repo
+    @hookimpl
+    def handle_files(self, repo, repo_parent_dir):
+        repo = repo.replace("dl=0", "dl=1")  # download set to 1 for dropbox
+        response = fetch_files(repo)
+        args = {
+            "repo": repo,
+            "repo_parent_dir": repo_parent_dir,
+            "response": response,
+            "origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
+            "download_dir": TEMP_DOWNLOAD_REPO_DIR
+        }
+        return handle_files_helper(args)
diff --git a/nbgitpuller/static/index.js b/nbgitpuller/static/index.js
index c85d5897..7be399e9 100644
--- a/nbgitpuller/static/index.js
+++ b/nbgitpuller/static/index.js
@@ -12,12 +12,13 @@ require([
 
     Terminal.applyAddon(fit);
 
-    function GitSync(baseUrl, repo, branch, depth, targetpath, path) {
+    function GitSync(baseUrl, repo, branch, depth, compressed, targetpath, path) {
         // Class that talks to the API backend & emits events as appropriate
         this.baseUrl = baseUrl;
         this.repo = repo;
         this.branch = branch;
         this.depth = depth;
+        this.compressed = compressed;
         this.targetpath = targetpath;
         this.redirectUrl = baseUrl + path;
 
@@ -52,6 +53,9 @@ require([
         if (typeof this.branch !== 'undefined' && this.branch != undefined) {
             syncUrlParams['branch'] = this.branch;
         }
+        if (typeof this.compressed !== 'undefined' && this.compressed != undefined) {
+            syncUrlParams['compressed'] = this.compressed;
+        }
         var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams);
 
         this.eventSource = new EventSource(syncUrl);
@@ -133,6 +137,7 @@ require([
         utils.get_body_data('repo'),
         utils.get_body_data('branch'),
         utils.get_body_data('depth'),
+        utils.get_body_data('compressed'),
         utils.get_body_data('targetpath'),
         utils.get_body_data('path')
     );
diff --git a/nbgitpuller/templates/status.html b/nbgitpuller/templates/status.html
index 1fcd00dc..99b9f53e 100644
--- a/nbgitpuller/templates/status.html
+++ b/nbgitpuller/templates/status.html
@@ -7,6 +7,7 @@
 data-path="{{ path | urlencode }}"
 {% if branch %}data-branch="{{ branch | urlencode }}"{% endif %}
 {% if depth %}data-depth="{{ depth | urlencode }}"{% endif %}
+{% if compressed %}data-compressed="{{ compressed | urlencode }}"{% endif %}
 data-targetpath="{{ targetpath | urlencode }}"
 {% endblock %}
 
diff --git a/setup.py b/setup.py
index 2afcea0f..3a367291 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@
     packages=find_packages(),
     include_package_data=True,
     platforms='any',
-    install_requires=['notebook>=5.5.0', 'tornado'],
+    install_requires=['notebook>=5.5.0', 'tornado', 'requests', 'requests-file'],
     data_files=[
         ('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/nbgitpuller.json'])
     ],
diff --git a/tests/test_files/hw/hw01/hw01.ipynb b/tests/test_files/hw/hw01/hw01.ipynb
new file mode 100644
index 00000000..960747ce
--- /dev/null
+++ b/tests/test_files/hw/hw01/hw01.ipynb
@@ -0,0 +1,1405 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize Otter\n",
+    "import otter\n",
+    "grader = otter.Notebook(\"hw01.ipynb\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Homework 1: Causality and Expressions\n",
+    "\n",
+    "Please complete this notebook by filling in the cells provided."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Recommended Reading:**\n",
+    "- [What is Data Science](http://www.inferentialthinking.com/chapters/01/what-is-data-science.html)\n",
+    "- [Causality and Experiments](http://www.inferentialthinking.com/chapters/02/causality-and-experiments.html) \n",
+    "- [Programming in Python](http://www.inferentialthinking.com/chapters/03/programming-in-python.html)\n",
+    "\n",
+    "For all problems that you must write explanations and sentences for, you **must** provide your answer in the designated space. Moreover, throughout this homework and all future ones, please be sure to not re-assign variables throughout the notebook! For example, if you use `max_temperature` in your answer to one question, do not reassign it later on. Otherwise, you will fail tests that you thought you were passing previously!\n",
+    "\n",
+    "\n",
+    "Directly sharing answers is not okay, but discussing problems with the course staff or with other students is encouraged. Refer to the policies page to learn more about how to learn cooperatively.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Scary Arithmetic\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "An ad for ADT Security Systems says,\n",
+    "\n",
+    "> \"When you go on vacation, burglars go to work [...] According to FBI statistics, over 25% of home burglaries occur between Memorial Day and Labor Day.\"\n",
+    "\n",
+    "Do the data in the ad support the claim that burglars are more likely to go to work during the time between Memorial Day and Labor Day? Please explain your answer.\n",
+    "\n",
+    "**Note:** You can assume that \"over 25%\" means only slightly over. Had it been much over, say closer to 30%, then the marketers would have said so.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q1\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "\n",
+    "\n",
+    "## 2. Characters in Little Women\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In lecture, we counted the number of times that the literary characters were named in each chapter of the classic book, [*Little Women*](https://www.inferentialthinking.com/chapters/01/3/1/literary-characters). In computer science, the word \"character\" also refers to a letter, digit, space, or punctuation mark; any single element of a text. The following code generates a scatter plot in which each dot corresponds to a chapter of *Little Women*. The horizontal position of a dot measures the number of periods in the chapter. The vertical position measures the total number of characters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# This cell contains code that hasn't yet been covered in the course,\n",
+    "# but you should be able to interpret the scatter plot it generates.\n",
+    "\n",
+    "from datascience import *\n",
+    "from urllib.request import urlopen\n",
+    "import numpy as np\n",
+    "%matplotlib inline\n",
+    "\n",
+    "little_women_url = 'https://www.inferentialthinking.com/data/little_women.txt'\n",
+    "chapters = urlopen(little_women_url).read().decode().split('CHAPTER ')[1:]\n",
+    "text = Table().with_column('Chapters', chapters)\n",
+    "Table().with_columns(\n",
+    "    'Periods',    np.char.count(chapters, '.'),\n",
+    "    'Characters', text.apply(len, 0)\n",
+    "    ).scatter(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 1.** Around how many periods are there in the chapter with the most characters? Assign either 1, 2, 3, 4, or 5 to the name `characters_q1` below.\n",
+    "\n",
+    "1. 250\n",
+    "2. 390\n",
+    "3. 440\n",
+    "4. 32,000\n",
+    "5. 40,000\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q2_1\n",
+    "manual: false\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "characters_q1 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q2_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The test above checks that your answers are in the correct format. **This test does not check that you answered correctly**, only that you assigned a number successfully in each multiple-choice answer cell."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 2.** Which of the following chapters has the most characters per period? Assign either 1, 2, or 3 to the name `characters_q2` below.\n",
+    "1. The chapter with about 60 periods\n",
+    "2. The chapter with about 350 periods\n",
+    "3. The chapter with about 440 periods\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q2_2\n",
+    "manual: false\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "characters_q2 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q2_2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Again, the test above checks that your answers are in the correct format, but not that you have answered correctly."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To discover more interesting facts from this plot, read [Section 1.3.2](https://www.inferentialthinking.com/chapters/01/3/2/another-kind-of-character) of the textbook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Names and Assignment Statements\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Question 1.** When you run the following cell, Python produces a cryptic error message."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "4 = 2 + 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "Choose the best explanation of what's wrong with the code, and then assign 1, 2, 3, or 4 to `names_q1` below to indicate your answer.\n",
+    "\n",
+    "1. Python is smart and already knows `4 = 2 + 2`.\n",
+    "\n",
+    "2. `4` is already a defined number, and it doesn't make sense to make a number be a name for something else. In Python, \"`x = 2 + 2`\" means \"assign `x` as the name for the value of `2 + 2`.\"\n",
+    "\n",
+    "3. It should be `2 + 2 = 4`.\n",
+    "\n",
+    "4. I don't get an error message. This is a trick question.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q3_1\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names_q1 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q3_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Question 2.** When you run the following cell, Python will produce another cryptic error message."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "two = 3\n",
+    "six = two plus two"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "Choose the best explanation of what's wrong with the code and assign 1, 2, 3, or 4 to `names_q2` below to indicate your answer.\n",
+    "\n",
+    "1. The `plus` operation only applies to numbers, not the word \"two\".\n",
+    "\n",
+    "2. The name \"two\" cannot be assigned to the number 3.\n",
+    "\n",
+    "3. Two plus two is four, not six.\n",
+    "\n",
+    "4. Python cannot interpret the name `two` followed directly by a name that has not been defined.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q3_2\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names_q2 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q3_2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Question 3.** When you run the following cell, Python will, yet again, produce another cryptic error message."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = print(5)\n",
+    "y = x + 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "Choose the best explanation of what's wrong with the code and assign 1, 2, or 3 to `names_q3` below to indicate your answer.\n",
+    "\n",
+    "1. Python doesn't want `y` to be assigned.\n",
+    "\n",
+    "2. The `print` operation is meant for displaying values to the programmer, not for assigning values!\n",
+    "\n",
+    "3. Python can’t do addition between one name and one number. It has to be 2 numbers or 2 predefined names.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q3_3\n",
+    "manual: false\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "names_q3 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q3_3\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Job Opportunities & Education in Rural India\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A [study](http://www.nber.org/papers/w16021.pdf) at UCLA investigated factors that might result in greater attention to the health and education of girls in rural India. One such factor is information about job opportunities for women. The idea is that if people know that educated women can get good jobs, they might take more care of the health and education of girls in their families, as an investment in the girls’ future potential as earners. Without the knowledge of job opportunities, the author hypothesizes that families do not invest in women’s well-being.\n",
+    "\n",
+    "The study focused on 160 villages outside the capital of India, all with little access to information about call centers and similar organizations that offer job opportunities to women. In 80 of the villages chosen at random, recruiters visited the village, described the opportunities, recruited women who had some English language proficiency and experience with computers, and provided ongoing support free of charge for three years. In the other 80 villages, no recruiters visited and no other intervention was made.\n",
+    "\n",
+    "At the end of the study period, the researchers recorded data about the school attendance and health of the children in the villages."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 1.** Which statement best describes the *treatment* and *control* groups for this study? Assign either 1, 2, or 3 to the name `jobs_q1` below.\n",
+    "\n",
+    "1. The treatment group was the 80 villages visited by recruiters, and the control group was the other 80 villages with no intervention.\n",
+    "\n",
+    "2. The treatment group was the 160 villages selected, and the control group was the rest of the villages outside the capital of India.\n",
+    "\n",
+    "3. There is no clear notion of *treatment* and *control* group in this study.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q4_1\n",
+    "manual: false\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jobs_q1 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q4_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 2.** Was this an observational study or a randomized controlled experiment? Assign either 1, 2, or 3 to the name `jobs_q2` below.\n",
+    "\n",
+    "1. This was an observational study.\n",
+    "\n",
+    "2. This was a randomized controlled experiment.  \n",
+    "\n",
+    "3. This was a randomized observational study.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q4_2\n",
+    "manual: false\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "jobs_q2 = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q4_2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 3.** The study reported, \"Girls aged 5-15 in villages that received the recruiting services were 3 to 5 percentage points more likely to be in school and experienced an increase in Body Mass Index, reflecting greater nutrition and/or medical care. However, there was no net gain in height. For boys, there was no change in any of these measures.\" Why do you think the author points out the lack of change in the boys?\n",
+    "\n",
+    "*Hint:* Remember the original hypothesis. The author believes that educating women in job opportunities will cause families to invest more in the women’s well-being.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q4_3\n",
+    "manual: true\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "\n",
+    "\n",
+    "## 5. Differences between Majors\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Berkeley’s Office of Planning and Analysis provides data on numerous aspects of the campus. Adapted from the OPA website, the table below displays the numbers of degree recipients in three majors in the academic years 2008-2009 and 2017-2018.\n",
+    "\n",
+    "| Major                              | 2008-2009    | 2017-2018   |\n",
+    "|------------------------------------|--------------|-------------|\n",
+    "| Gender and Women's Studies         |      17      |    28       |\n",
+    "| Linguistics                        |      49      |    67       |\n",
+    "| Rhetoric                           |      113     |    56       |\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 1.** Suppose you want to find the **biggest** absolute difference between the numbers of degree recipients in the two years, among the three majors.\n",
+    "\n",
+    "In the cell below, compute this value and call it `biggest_change`. Use a single expression (a single line of code) to compute the answer. Let Python perform all the arithmetic (like subtracting 49 from 67) rather than simplifying the expression yourself. The built-in `abs` function takes a numerical input and returns the absolute value. The built-in `max` function can take in 3 arguments and returns the maximum of the three numbers\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q5_1\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "export_pdf": true
+   },
+   "outputs": [],
+   "source": [
+    "biggest_change = ...\n",
+    "biggest_change"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q5_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "**Question 2.** Which of the three majors had the **smallest** absolute difference? Assign `smallest_change_major` to 1, 2, or 3 where each number corresponds to the following major:\n",
+    "\n",
+    "1: Gender and Women's Studies  \n",
+    "2: Linguistics  \n",
+    "3: Rhetoric\n",
+    "\n",
+    "Choose the number that corresponds to the major with the smallest absolute difference.\n",
+    "\n",
+    "You should be able to answer by rough mental arithmetic, without having to calculate the exact value for each major. \n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q5_2\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "smallest_change_major = ...\n",
+    "smallest_change_major"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q5_2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 3.**  For each major, define the \"relative change\" to be the following: $\\large{\\frac{\\text{absolute difference}}{\\text{value in 2008-2009}} * 100}$ \n",
+    "\n",
+    "Fill in the code below such that `gws_relative_change`, `linguistics_relative_change` and `rhetoric_relative_change` are assigned to the relative changes for their respective majors.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q5_3\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "for_assignment_type": "student"
+   },
+   "outputs": [],
+   "source": [
+    "gws_relative_change = (abs(...) / 17) * 100\n",
+    "linguistics_relative_change = ...\n",
+    "rhetoric_relative_change = ...\n",
+    "gws_relative_change, linguistics_relative_change, rhetoric_relative_change"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q5_3\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 4.** Assign `biggest_rel_change_major` to 1, 2, or 3 where each number corresponds to to the following: \n",
+    "\n",
+    "1: Gender and Women's Studies  \n",
+    "2: Linguistics  \n",
+    "3: Rhetoric\n",
+    "\n",
+    "Choose the number that corresponds to the major with the biggest relative change.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q5_4\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Assign biggest_rel_change_major to the number corresponding to the major with the biggest relative change.\n",
+    "biggest_rel_change_major = ...\n",
+    "biggest_rel_change_major"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q5_4\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Nearsightedness Study\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Myopia, or nearsightedness, results from a number of genetic and environmental factors. In 1999, Quinn et al studied the relation between myopia and ambient lighting at night (for example, from nightlights or room lights) during childhood."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 1.** The data were gathered by the following procedure, reported in the study. \"Between January and June 1998, parents of children aged 2-16 years [...] that were seen as outpatients in a university pediatric ophthalmology clinic completed a questionnaire on the child’s light exposure both at present and before the age of 2 years.\" Was this study observational, or was it a controlled experiment? Explain. \n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q6_1\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 2.** The study found that of the children who slept with a room light on before the age of 2, 55% were myopic. Of the children who slept with a night light on before the age of 2, 34% were myopic. Of the children who slept in the dark before the age of 2, 10% were myopic. The study concluded that, \"The prevalence of myopia [...] during childhood was strongly associated with ambient light exposure during sleep at night in the first two years after birth.\"\n",
+    "\n",
+    "Do the data support this statement? You may interpret \"strongly\" in any reasonable qualitative way.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q6_2\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 3.** On May 13, 1999, CNN reported the results of this study under the headline, \"Night light may lead to nearsightedness.\" Does the conclusion of the study claim that night light causes nearsightedness?\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q6_3\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "<!-- BEGIN QUESTION -->\n",
+    "\n",
+    "**Question 4.** The final paragraph of the CNN report said that \"several eye specialists\" had pointed out that the study should have accounted for heredity.\n",
+    "\n",
+    "Myopia is passed down from parents to children. Myopic parents are more likely to have myopic children, and may also be more likely to leave lights on habitually (since the parents have poor vision). In what way does the knowledge of this possible genetic link affect how we interpret the data from the study? \n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q6_4\n",
+    "manual: True\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "_Type your answer here, replacing this text._"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!-- END QUESTION -->\n",
+    "\n",
+    "\n",
+    "\n",
+    "## 7. Studying the Survivors\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "The Reverend Henry Whitehead was skeptical of John Snow’s conclusion about the Broad Street pump. After the Broad Street cholera epidemic ended, Whitehead set about trying to prove Snow wrong.  (The history of the event is detailed [here](http://www.ncbi.nlm.nih.gov/pmc/articles/PMC1034367/pdf/medhist00183-0026.pdf).)\n",
+    "\n",
+    "He realized that Snow had focused his analysis almost entirely on those who had died. Whitehead, therefore, investigated the drinking habits of people in the Broad Street area who had not died in the outbreak.\n",
+    "\n",
+    "What is the main reason it was important to study this group?\n",
+    "\n",
+    "1) If Whitehead had found that many people had drunk water from the Broad Street pump and not caught cholera, that would have been evidence against Snow's hypothesis.\n",
+    "\n",
+    "2) Survivors could provide additional information about what else could have caused the cholera, potentially unearthing another cause.\n",
+    "\n",
+    "3) Through considering the survivors, Whitehead could have identified a cure for cholera.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q7_1\n",
+    "manual: False\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Assign survivor_answer to 1, 2, or 3\n",
+    "survivor_answer = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q7_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note:** Whitehead ended up finding further proof that the Broad Street pump played the central role in spreading the disease to the people who lived near it. Eventually, he became one of Snow’s greatest defenders."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Policies and Administrivia\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This section of the homework is to ensure that you have read over the policies and frequently asked questions for the course. \n",
+    "\n",
+    "**It's important that you read through this section of the homework very carefully**. If you can get through all of this section and are sure you have all of the correct resources set up, you will be able to focus on the actual material this semester!\n",
+    "\n",
+    "Reading through the [policies](http://data8.org/sp20/policies.html) and the [FAQ](http://data8.org/sp20/faq.html) will help you get through this section very easily. It is recommended you do this before. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 1:** You have a question regarding the grading of your assignments that has not been previously answered on Piazza or the FAQ. Who do you contact? Assign `contact` to the number corresponding to the best choice below. \n",
+    "\n",
+    "1. The Instructors\n",
+    "2. Post on Piazza\n",
+    "3. Contact your Lab TA\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_1\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "contact = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_1\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 2:** Why will the grades on Gradescope and OkPy be different? Assign `grades` to the number corresponding to the best choice below. \n",
+    "\n",
+    "1. There was a mistake in the grading. I should contact someone about this\n",
+    "2. Gradescope grades the written portion, while OkPy grades the coded portion\n",
+    "3. Trick question; the grades should be the same on both platforms\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_2\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grades = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 3:** Regrade deadline dates will always be posted on the same Piazza post that releases the assignment grades, common mistakes, and solutions. Can you ask for parts of your assignment regraded after the regrade request window has passed? Assign `regrade` to the number corresponding to the best choice below. \n",
+    "\n",
+    "1. Yes\n",
+    "2. No\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_3\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regrade = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_3\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 4:** Do you have an Gradescope account? Head to [gradescope.com](http://gradescope.com) and check if you see Data 8. If you do not, please send your Lab TA an email with your email and student ID number. \n",
+    "\n",
+    "Once you have been enrolled, go to the Data 8 Gradescope course website. At the end of the url (link), you should see a number. Assign `gradescope` to that number. \n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_4\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gradescope = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_4\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 5:** Given the following scenarios, assign `acceptable` to the number of the scenario that is permissible given the guidelines on the [policies](http://data8.org/sp20/policies.html) page. \n",
+    "\n",
+    "1. Alice gets stuck on a homework assignment, so she googles a fix. She stumbles across a pdf of the solutions for the homework assignment from a previous semester's offering of Data 8. After inspecting the solution, Alice writes her own solution and submits the assignment.\n",
+    "\n",
+    "2. After getting confused by a project, Bob asks his friend for help. His friend helps by walking the student through his own logic, pointing out areas that are important given the context of the question. Upon hearing his friends logic, the Bob writes his own code and completes the project.\n",
+    "\n",
+    "3. Eve has an extremely busy schedule, so she really wants to leave lab early by finishing it and getting checked off. Her neighbor, Charlie, simply turns his computer so Eve can see how he completed some questions. After looking at his code, Eve finishes the lab and gets checked off.\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_5\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "acceptable = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_5\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 6:** To make sure you have read through the [policies](http://data8.org/sp20/policies.html) and the [FAQ](http://data8.org/sp20/faq.html) carefully, how many HW/lab drops are there? Assign `drops` to the number corresponding to the best choice below. \n",
+    "\n",
+    "1. Two homework drops and one lab drop\n",
+    "2. One homework drop and one lab drop\n",
+    "3. Only one homework drop\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_6\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drops = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_6\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 7:** Does Data 8 offer any alternate exams? Assign `exams` to the number corresponding to the best choice below. \n",
+    "\n",
+    "1. Yes\n",
+    "2. No\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_7\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exams = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_7\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "**Question 8:** Are you actually checking Piazza? Go to this semester's [Data 8 Piazza](https://piazza.com/class/k5fwiw4wql642x), and find an instructor posted thread with a certain secret phrase. Assign `secret` to this secret phrase in quotes (aka as a string).\n",
+    "\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q8_8\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "secret = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q8_8\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9. Welcome Survey\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once you have submitted, please also complete the welcome survey in order to receive credit for homework 1.\n",
+    "\n",
+    "Welcome survey is here: https://docs.google.com/forms/d/e/1FAIpQLSd28-DvELnGk4n6lHcqMOWcsovDulNSbhmlLFXqDMQIsdldaQ/viewform?usp=sf_link"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "Assign `survey` to the secret string given at the end of the welcome survey:\n",
+    "<!--\n",
+    "BEGIN QUESTION\n",
+    "name: q9\n",
+    "-->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "survey = ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check(\"q9\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "---\n",
+    "\n",
+    "To double-check your work, the cell below will rerun all of the autograder tests."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "grader.check_all()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "source": [
+    "## Submission\n",
+    "\n",
+    "Make sure you have run all cells in your notebook in order before running the cell below, so that all images/graphs appear in the output. The cell below will generate a zip file for you to submit. **Please save before exporting!**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "deletable": false,
+    "editable": false
+   },
+   "outputs": [],
+   "source": [
+    "# Save your notebook first, then run this cell to export your submission.\n",
+    "grader.export(pdf=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " "
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "None",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/tests/test_zip_puller.py b/tests/test_zip_puller.py
new file mode 100644
index 00000000..6d7988fe
--- /dev/null
+++ b/tests/test_zip_puller.py
@@ -0,0 +1,55 @@
+import os
+import pytest
+import shutil
+from nbgitpuller.plugins.zip_puller import ZipSourceWebDownloader
+from nbgitpuller.plugins.zip_puller import ZipSourceDropBoxDownloader
+from nbgitpuller.plugins.zip_puller import ZipSourceGoogleDriveDownloader
+
+test_files_dir = os.getcwd() + "/tests/test_files"
+archive_base = "/tmp/test_files"
+repo_parent_dir = "/tmp/fake/"
+repo_zip = 'file://' + archive_base + ".zip"
+repo_tgz = 'file://' + archive_base + ".tar.gz"
+
+
+@pytest.fixture
+def test_configuration():
+    shutil.make_archive(archive_base, 'zip', test_files_dir)
+    shutil.make_archive(archive_base, 'gztar', test_files_dir)
+    os.makedirs(repo_parent_dir, exist_ok=True)
+    yield "test finishing"
+    shutil.rmtree(repo_parent_dir)
+    os.remove(archive_base + ".zip")
+    os.remove(archive_base + ".tar.gz")
+
+
+def assert_helper(down, zip, tgz):
+    resp_zip = down.handle_files(zip, repo_parent_dir)
+    resp_tgz = down.handle_files(tgz, repo_parent_dir)
+    assert "unzip_dir" in resp_zip
+    assert "origin_repo_path" in resp_zip
+    assert f"{repo_parent_dir}.origin_non_git_sources" in resp_zip["origin_repo_path"]
+    assert "hw" in resp_zip["unzip_dir"]
+    assert "unzip_dir" in resp_tgz
+    assert "origin_repo_path" in resp_tgz
+    assert f"{repo_parent_dir}.origin_non_git_sources" in resp_tgz["origin_repo_path"]
+    assert "hw" in resp_tgz["unzip_dir"]
+
+
+def test_web_downloader(test_configuration):
+    down = ZipSourceWebDownloader()
+    assert_helper(down, repo_zip, repo_tgz)
+
+
+def test_dropbox_downloader(test_configuration):
+    down = ZipSourceDropBoxDownloader()
+    drop_repo_zip = repo_zip + "?dl=0"
+    drop_repo_tgz = repo_tgz + "?dl=0"
+    assert_helper(down, drop_repo_zip, drop_repo_tgz)
+
+
+def test_google_get_id():
+    down = ZipSourceGoogleDriveDownloader()
+    google_repo = "https://drive.google.com/file/d/1p3m0h5UGWdLkVVP0SSJH6j1HpG2yeDlU/view?usp=sharing"
+    file_id = down.get_id(google_repo)
+    assert file_id == "1p3m0h5UGWdLkVVP0SSJH6j1HpG2yeDlU"