diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..e489f78c5
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,117 @@
+
+name: tests
+
+on:
+ push:
+ branches: [master, v2]
+ paths-ignore: "docs/**"
+ pull_request:
+ branches: [master, v2]
+ paths-ignore: "docs/**"
+ schedule:
+ # Run every Sunday
+ - cron: "0 0 * * 0"
+ workflow_dispatch:
+
+jobs:
+ code-quality:
+ name: Code Quality
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+ cache: "pip"
+ cache-dependency-path: |
+ pyproject.toml
+ dev-requirements.txt
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ make requirements
+
+ - name: Lint
+ run: |
+ make lint
+
+ tests:
+ name: Tests (${{ matrix.os }}, Python ${{ matrix.python-version }})
+ needs: code-quality
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
+ defaults:
+ run:
+ shell: bash
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ allow-prereleases: true
+ cache: "pip"
+ cache-dependency-path: |
+ pyproject.toml
+ dev-requirements.txt
+
+ - name: Set up Miniconda
+ uses: conda-incubator/setup-miniconda@v3
+ with:
+ auto-activate-base: true
+ activate-environment: ""
+
+ - name: Cache conda packages
+ uses: actions/cache@v4
+ env:
+ # Increase this value to reset cache explicitly
+ CACHE_NUMBER: 0
+ with:
+ path: ~/conda_pkgs_dir
+ key:
+ ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
+ hashFiles('ccds/hook_utils/dependencies.py') }}
+
+ - name: Setup for Windows
+ if: ${{ matrix.os == 'windows-latest' }}
+ run: |
+ # Install GNU make
+ choco install --verbose make
+ # Add conda binaries directory to PATH so that conda is accessible
+ echo "${CONDA}\Scripts" >> $GITHUB_PATH
+ # Set bash executable explicitly since Make may pick wrong shell
+ echo "BASH_EXECUTABLE=$(which bash)" >> "$GITHUB_ENV"
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ make requirements
+
+ - name: Check dependencies
+ run: |
+ if [[ -z "${BASH_EXECUTABLE}" ]]; then
+ bash --version
+ else
+ echo $BASH_EXECUTABLE
+ $BASH_EXECUTABLE --version
+ fi
+ which make
+ make --version
+ which conda
+ conda --version
+ which pipenv
+ pipenv --version
+ which virtualenv
+ virtualenv --version
+
+ - name: Run tests
+ run: |
+ make test
diff --git a/.gitignore b/.gitignore
index eb9b79f8e..2f91f4ed0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,177 @@
+# generated docs items
docs/site/
+docs/docs/_partials/termynal.md
+docs/docs/_partials/*/*.html
-# OSX Junk
+# test cache
+manual_test/
+
+# other local dev info
+.vscode/
+
+# Mac OS-specific storage files
.DS_Store
-# test cache
-.cache/*
-tests/__pycache__/*
-*.pytest_cache/
\ No newline at end of file
+# vim
+*.swp
+*.swo
+
+## https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/LICENSE b/LICENSE
index 3981d8a60..a3de0161a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -5,4 +5,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..4307ee1cc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,66 @@
+.PHONY: _prep create_environment requirements format lint docs docs-serve test \
+ test-fastest test-debug-fastest _clean_manual_test manual-test manual-test-debug
+
+## GLOBALS
+
+PROJECT_NAME = cookiecutter-data-science
+PYTHON_VERSION = 3.10
+PYTHON_INTERPRETER = python
+
+
+### UTILITIES
+_prep:
+ rm -f **/*/.DS_store
+
+
+### DEV COMMANDS
+
+## Set up python interpreter environment
+create_environment:
+ conda create --name $(PROJECT_NAME) python=$(PYTHON_VERSION) -y
+ @echo ">>> conda env created. Activate with:\nconda activate $(PROJECT_NAME)"
+
+## Install Python Dependencies
+requirements:
+ $(PYTHON_INTERPRETER) -m pip install -r dev-requirements.txt
+
+## Format the code using isort and black
+format:
+ isort --profile black ccds hooks tests docs/scripts
+ black ccds hooks tests docs/scripts
+
+lint:
+ flake8 ccds hooks tests docs/scripts
+ isort --check --profile black ccds hooks tests docs/scripts
+ black --check ccds hooks tests docs/scripts
+
+
+### DOCS
+
+docs:
+ cd docs && mkdocs build
+
+docs-serve:
+ cd docs && mkdocs serve
+
+### TESTS
+
+test: _prep
+ pytest -vvv --durations=0
+
+test-fastest: _prep
+ pytest -vvv -FFF
+
+test-debug-last:
+ pytest --lf --pdb
+
+_clean_manual_test:
+ rm -rf manual_test
+
+manual-test: _prep _clean_manual_test
+ mkdir -p manual_test
+ cd manual_test && python -m ccds ..
+
+manual-test-debug: _prep _clean_manual_test
+ mkdir -p manual_test
+ cd manual_test && python -m pdb ../ccds/__main__.py ..
diff --git a/README.md b/README.md
index 49a1eb384..62e5b631a 100644
--- a/README.md
+++ b/README.md
@@ -1,54 +1,43 @@
# Cookiecutter Data Science
-_A logical, reasonably standardized, but flexible project structure for doing and sharing data science work._
+_A logical, reasonably standardized but flexible project structure for doing and sharing data science work._
+**Cookiecutter Data Science (CCDS)** is a tool for setting up a data science project template that incorporates best practices. To learn more about CCDS's philosophy, visit the project homepage](https://drivendata.github.io/cookiecutter-data-science/).
-#### [Project homepage](http://drivendata.github.io/cookiecutter-data-science/)
+> ℹ️ Cookiecutter Data Science v2 has changed from v1. It now requires installing the new cookiecutter-data-science Python package, which extends the functionality of the [cookiecutter](https://cookiecutter.readthedocs.io/en/stable/README.html) templating utility. Use the provided `ccds` command-line program instead of `cookiecutter`.
+## Installation
-### Requirements to use the cookiecutter template:
------------
- - Python 2.7 or 3.5+
- - [Cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: This can be installed with pip by or conda depending on how you manage your Python packages:
+Cookiecutter Data Science v2 requires Python 3.8+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
-``` bash
-$ pip install cookiecutter
-```
+```bash
+# With pipx from PyPI (recommended)
+pipx install cookiecutter-data-science
-or
+# With pip from PyPI
+pip install cookiecutter-data-science
-``` bash
-$ conda config --add channels conda-forge
-$ conda install cookiecutter
+# With conda from conda-forge (coming soon)
+# conda install cookiecutter-data-science -c conda-forge
```
+## Starting a new project
-### To start a new project, run:
-------------
-
- cookiecutter -c v1 https://github.com/drivendata/cookiecutter-data-science
+To start a new project, run:
+```bash
+ccds
+```
[![asciicast](https://asciinema.org/a/244658.svg)](https://asciinema.org/a/244658)
-### New version of Cookiecutter Data Science
-------------
-Cookiecutter data science is moving to v2 soon, which will entail using
-the command `ccds ...` rather than `cookiecutter ...`. The cookiecutter command
-will continue to work, and this version of the template will still be available.
-To use the legacy template, you will need to explicitly use `-c v1` to select it.
-Please update any scripts/automation you have to append the `-c v1` option (as above),
-which is available now.
-
-
### The resulting directory structure
-------------
-The directory structure of your new project looks like this:
+The directory structure of your new project will look something like this (depending on the settings that you choose):
```
-├── LICENSE
-├── Makefile <- Makefile with commands like `make data` or `make train`
+├── LICENSE <- Open-source license if one is chosen
+├── Makefile <- Makefile with convenience commands like `make data` or `make train`
├── README.md <- The top-level README for developers using this project.
├── data
│ ├── external <- Data from third party sources.
@@ -56,7 +45,7 @@ The directory structure of your new project looks like this:
│ ├── processed <- The final, canonical data sets for modeling.
│ └── raw <- The original, immutable data dump.
│
-├── docs <- A default Sphinx project; see sphinx-doc.org for details
+├── docs <- A default mkdocs project; see mkdocs.org for details
│
├── models <- Trained and serialized models, model predictions, or model summaries
│
@@ -64,6 +53,9 @@ The directory structure of your new project looks like this:
│ the creator's initials, and a short `-` delimited description, e.g.
│ `1.0-jqp-initial-data-exploration`.
│
+├── pyproject.toml <- Project configuration file with package metadata for {{ cookiecutter.module_name }}
+│ and configuration for tools like black
+│
├── references <- Data dictionaries, manuals, and all other explanatory materials.
│
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
@@ -72,25 +64,35 @@ The directory structure of your new project looks like this:
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
│
-├── setup.py <- makes project pip installable (pip install -e .) so src can be imported
-├── src <- Source code for use in this project.
-│ ├── __init__.py <- Makes src a Python module
-│ │
-│ ├── data <- Scripts to download or generate data
-│ │ └── make_dataset.py
-│ │
-│ ├── features <- Scripts to turn raw data into features for modeling
-│ │ └── build_features.py
-│ │
-│ ├── models <- Scripts to train models and then use trained models to make
-│ │ │ predictions
-│ │ ├── predict_model.py
-│ │ └── train_model.py
-│ │
-│ └── visualization <- Scripts to create exploratory and results oriented visualizations
-│ └── visualize.py
+├── setup.cfg <- Configuration file for flake8
│
-└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
+└── {{ cookiecutter.module_name }} <- Source code for use in this project.
+ │
+ ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module
+ │
+ ├── data <- Scripts to download or generate data
+ │ └── make_dataset.py
+ │
+ ├── features <- Scripts to turn raw data into features for modeling
+ │ └── build_features.py
+ │
+ ├── models <- Scripts to train models and then use trained models to make
+ │ │ predictions
+ │ ├── predict_model.py
+ │ └── train_model.py
+ │
+ └── visualization <- Scripts to create exploratory and results oriented visualizations
+ └── visualize.py
+```
+
+## Using v1
+
+If you want to use the old v1 project template, you need to have either the cookiecutter-data-science package or cookiecutter package installed. Then, use either command-line program with the `-c v1` option:
+
+```bash
+ccds https://github.com/drivendata/cookiecutter-data-science -c v1
+# or equivalently
+cookiecutter https://github.com/drivendata/cookiecutter-data-science -c v1
```
## Contributing
@@ -98,11 +100,13 @@ The directory structure of your new project looks like this:
We welcome contributions! [See the docs for guidelines](https://drivendata.github.io/cookiecutter-data-science/#contributing).
### Installing development requirements
-------------
- pip install -r requirements.txt
+```bash
+pip install -r dev-requirements.txt
+```
### Running the tests
-------------
- py.test tests
+```bash
+pytest tests
+```
diff --git a/ccds-help.json b/ccds-help.json
new file mode 100644
index 000000000..84b26b8cd
--- /dev/null
+++ b/ccds-help.json
@@ -0,0 +1,281 @@
+[
+ {
+ "field": "project_name",
+ "help": {
+ "description": "A name for the project, for example 'My Project'.",
+ "more_information": ""
+ }
+ },
+ {
+ "field": "repo_name",
+ "help": {
+ "description": "Default generated by altering the `project_name`. Used for folder and repo name for the project.",
+ "more_information": ""
+ }
+ },
+ {
+ "field": "module_name",
+ "help": {
+ "description": "Default generated by altering the `project_name` to be a compatible Python module name.",
+ "more_information": ""
+ }
+ },
+ {
+ "field": "author_name",
+ "help": {
+ "description": "Name of the individual or organization that created the project.",
+ "more_information": ""
+ }
+ },
+ {
+ "field": "description",
+ "help": {
+ "description": "A short description that appears in the README.md file by default.",
+ "more_information": ""
+ }
+ },
+ {
+ "field": "python_version_number",
+ "help": {
+ "description": "The version of Python that the project will use.",
+ "more_information": "[Python version status](https://devguide.python.org/versions/)"
+ }
+ },
+ {
+ "field": "dataset_storage",
+ "help": {
+ "description": "A cloud storage location for where data should be stored; controls `sync_data_up` and `sync_data_down` Makefile commands. You will be asked for different additional configuration information depending on your choice.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "none",
+ "help": {
+ "description": "No cloud storage configured; Makefile commands to sync data removed.",
+ "more_information": ""
+ }
+ },
+ {
+ "choice": "azure",
+ "help": {
+ "description": "Store data on Azure Blob Storage.",
+ "more_information": "[Docs](https://learn.microsoft.com/en-us/azure/storage/blobs/)"
+ },
+ "subfields": [
+ {
+ "field": "container",
+ "help": {
+ "description": "Name of the container on blob storage.",
+ "more_information": "[Docs](https://learn.microsoft.com/en-us/azure/storage/blobs/blob-containers-portal)"
+ }
+ }
+ ]
+ },
+ {
+ "choice": "s3",
+ "help": {
+ "description": "Store data on Amazon S3.",
+ "more_information": "[Docs](https://docs.aws.amazon.com/s3/)"
+ },
+ "subfields": [
+ {
+ "field": "bucket",
+ "help": {
+ "description": "The name of the bucket to store data in; can also be a longer S3 path.",
+ "more_information": "[Docs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html#BasicsBucket)"
+ }
+ },
+ {
+ "field": "aws_profile",
+ "help": {
+ "description": "The name of the profile to use for the aws CLI.",
+ "more_information": "[Docs](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html)"
+ }
+ }
+ ]
+ },
+ {
+ "choice": "gcs",
+ "help": {
+ "description": "Store data to Google Cloud Storage",
+ "more_information": "[Docs](https://cloud.google.com/storage/docs)"
+ },
+ "subfields": [
+ {
+ "field": "bucket",
+ "help": {
+ "description": "The name of the bucket to store data in.",
+ "more_information": "[Docs](https://cloud.google.com/storage/docs/buckets)"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "field": "environment_manager",
+ "help": {
+ "description": "Tool for managing creating Python environments. Controls `make create_environment` Makefile command.",
+ "more_information": "[About virtual environments](https://www.dataquest.io/blog/a-complete-guide-to-python-virtual-environments/)"
+ },
+ "choices": [
+ {
+ "choice": "virtualenv",
+ "help": {
+ "description": "Simple wrapper on venv with more features.",
+ "more_information": "[Docs](https://virtualenv.pypa.io/en/latest/)"
+ }
+ },
+ {
+ "choice": "conda",
+ "help": {
+ "description": "Data science environments supporting non-Python dependencies as well.",
+ "more_information": "[Docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)"
+ }
+ },
+ {
+ "choice": "pipenv",
+ "help": {
+ "description": "Manages dependencies and virual environments.",
+ "more_information": "[Docs](https://pipenv.pypa.io/en/latest/)"
+ }
+ },
+ {
+ "choice": "none",
+ "help": {
+ "description": "Do not add `create_environment` commands; env management left to the user.",
+ "more_information": ""
+ }
+ }
+ ]
+ },
+ {
+ "field": "dependency_file",
+ "help": {
+ "description": "Where to track project-specific dependencies; often paired with specific environment manager.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "requirements.txt",
+ "help": {
+ "description": "Most general, least feature-rich format for use with `pip`.",
+ "more_information": "[pip docs](https://pip.pypa.io/en/stable/reference/requirements-file-format/)"
+ }
+ },
+ {
+ "choice": "environment.yml",
+ "help": {
+ "description": "Format used by `conda`.",
+ "more_information": "[Docs](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#sharing-an-environment)"
+ }
+ },
+ {
+ "choice": "Pipfile",
+ "help": {
+ "description": "Format used by Pipenv",
+ "more_information": "[Docs](https://pipenv.pypa.io/en/latest/pipfile.html)"
+ }
+ }
+ ]
+ },
+ {
+ "field": "pydata_packages",
+ "help": {
+ "description": "Packages automatically added to your requirements file.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "none",
+ "help": {
+ "description": "No extra packages.",
+ "more_information": ""
+ }
+ },
+ {
+ "choice": "basic",
+ "help": {
+ "description": "Basic common data science packages.",
+ "more_information": ""
+ }
+ }
+ ]
+ },
+ {
+ "field": "open_source_license",
+ "help": {
+ "description": "Whether to include a license file and which one to use.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "No license file",
+ "help": {
+ "description": "No license file will be included.",
+ "more_information": ""
+ }
+ },
+ {
+ "choice": "MIT",
+ "help": {
+ "description": "MIT License",
+ "more_information": "[License](https://opensource.org/license/mit)"
+ }
+ },
+ {
+ "choice": "BSD-3-Clause",
+ "help": {
+ "description": "3-Clause BSD License",
+ "more_information": "[License](https://opensource.org/license/bsd-3-clause)"
+ }
+ }
+ ]
+ },
+ {
+ "field": "docs",
+ "help": {
+ "description": "Whether to include a `docs` folder and documentation tools.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "none",
+ "help": {
+ "description": "No `docs` folder.",
+ "more_information": ""
+ }
+ },
+ {
+ "choice": "mkdocs",
+ "help": {
+ "description": "Include `docs` folder and `mkdocs` tools.",
+ "more_information": "[Docs](https://www.mkdocs.org/)"
+ }
+ }
+ ]
+ },
+ {
+ "field": "include_code_scaffold",
+ "help": {
+ "description": "Whether to include some basic boilerplate code in the Python module.",
+ "more_information": ""
+ },
+ "choices": [
+ {
+ "choice": "Yes",
+ "help": {
+ "description": "Includes common data submodules.",
+ "more_information": ""
+ }
+ },
+ {
+ "choice": "No",
+ "help": {
+ "description": "Empty Python module to start with.",
+ "more_information": ""
+ }
+ }
+ ]
+ }
+]
diff --git a/ccds.json b/ccds.json
new file mode 100644
index 000000000..2ad0a0ee2
--- /dev/null
+++ b/ccds.json
@@ -0,0 +1,32 @@
+{
+ "project_name": "project_name",
+ "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
+ "module_name": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}",
+ "author_name": "Your name (or your organization/company/team)",
+ "description": "A short description of the project.",
+ "python_version_number": "3.10",
+ "dataset_storage": [
+ {"none": "none"},
+ {"azure": {"container": "container-name"}},
+ {"s3": {"bucket": "bucket-name", "aws_profile": "default"}},
+ {"gcs": {"bucket": "bucket-name"}}
+ ],
+ "environment_manager": [
+ "virtualenv",
+ "conda",
+ "pipenv",
+ "none"
+ ],
+ "dependency_file": [
+ "requirements.txt",
+ "environment.yml",
+ "Pipfile"
+ ],
+ "pydata_packages": [
+ "none",
+ "basic"
+ ],
+ "open_source_license": ["No license file", "MIT", "BSD-3-Clause"],
+ "docs": ["mkdocs", "none"],
+ "include_code_scaffold": ["Yes", "No"]
+}
\ No newline at end of file
diff --git a/{{ cookiecutter.repo_name }}/src/__init__.py b/ccds/__init__.py
similarity index 100%
rename from {{ cookiecutter.repo_name }}/src/__init__.py
rename to ccds/__init__.py
diff --git a/ccds/__main__.py b/ccds/__main__.py
new file mode 100644
index 000000000..b15effe7b
--- /dev/null
+++ b/ccds/__main__.py
@@ -0,0 +1,41 @@
+# Monkey-patch jinja to allow variables to not exist, which happens with sub-options
+import jinja2
+
+jinja2.StrictUndefined = jinja2.Undefined
+
+
+# Monkey-patch cookiecutter to allow sub-items
+from cookiecutter import prompt
+
+from ccds.monkey_patch import prompt_for_config
+
+prompt.prompt_for_config = prompt_for_config
+
+
+# monkey-patch context to point to ccds.json
+from cookiecutter import generate
+
+from ccds.monkey_patch import generate_context_wrapper
+
+generate.generate_context = generate_context_wrapper
+
+# for use in tests need monkey-patched api main
+from cookiecutter import cli
+from cookiecutter import main as api_main # noqa: F401 referenced by tests
+
+
+def default_ccds_main(f):
+ """Set the default for the cookiecutter template argument to the CCDS template."""
+
+ def _main(*args, **kwargs):
+ f.params[1].default = "https://github.com/drivendata/cookiecutter-data-science"
+ return f(*args, **kwargs)
+
+ return _main
+
+
+main = default_ccds_main(cli.main)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/{{ cookiecutter.repo_name }}/src/data/__init__.py b/ccds/hook_utils/__init__.py
similarity index 100%
rename from {{ cookiecutter.repo_name }}/src/data/__init__.py
rename to ccds/hook_utils/__init__.py
diff --git a/ccds/hook_utils/custom_config.py b/ccds/hook_utils/custom_config.py
new file mode 100644
index 000000000..673aa5f03
--- /dev/null
+++ b/ccds/hook_utils/custom_config.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+from shutil import copytree
+from tempfile import TemporaryDirectory
+from urllib.request import urlretrieve
+from zipfile import ZipFile
+
+from cookiecutter.vcs import clone
+
+
+def write_custom_config(user_input_config):
+ if not user_input_config:
+ return
+
+ tmp = TemporaryDirectory()
+ tmp_zip = None
+
+ print(user_input_config)
+
+ # if not absolute, test if local path relative to parent of created directory
+ if not user_input_config.startswith("/"):
+ test_path = Path("..") / user_input_config
+ else:
+ test_path = Path(user_input_config)
+
+ # check if user passed a local path
+ if test_path.exists() and test_path.is_dir():
+ local_path = test_path
+
+ elif test_path.exists() and test_path.endswith(".zip"):
+ tmp_zip = test_path
+
+ # check if user passed a url to a zip
+ elif user_input_config.startswith("http") and (
+ user_input_config.split(".")[-1] in ["zip"]
+ ):
+ tmp_zip, _ = urlretrieve(user_input_config)
+
+ # assume it is a VCS uri and try to clone
+ else:
+ clone(user_input_config, clone_to_dir=tmp)
+ local_path = tmp
+
+ if tmp_zip:
+ with ZipFile(tmp_zip, "r") as zipf:
+ zipf.extractall(tmp)
+ local_path = tmp
+
+ # write whatever the user supplied into the project
+ copytree(local_path, ".")
+
+ tmp.cleanup()
diff --git a/ccds/hook_utils/dependencies.py b/ccds/hook_utils/dependencies.py
new file mode 100644
index 000000000..e63f8f752
--- /dev/null
+++ b/ccds/hook_utils/dependencies.py
@@ -0,0 +1,65 @@
+packages = [
+ "black",
+ "flake8",
+ "isort",
+ "pip",
+ "python-dotenv",
+]
+
+basic = [
+ "ipython",
+ "jupyterlab",
+ "matplotlib",
+ "notebook",
+ "numpy",
+ "pandas",
+ "scikit-learn",
+]
+
+scaffold = [
+ "typer",
+ "loguru",
+ "tqdm",
+]
+
+
+def write_dependencies(
+ dependencies, packages, pip_only_packages, repo_name, module_name, python_version
+):
+ if dependencies == "requirements.txt":
+ with open(dependencies, "w") as f:
+ lines = sorted(packages)
+
+ lines += ["" "-e ."]
+
+ f.write("\n".join(lines))
+ f.write("\n")
+
+ elif dependencies == "environment.yml":
+ with open(dependencies, "w") as f:
+ lines = [
+ f"name: {repo_name}",
+ "channels:",
+ " - conda-forge",
+ "dependencies:",
+ ]
+
+ lines += [f" - python={python_version}"]
+ lines += [f" - {p}" for p in packages if p not in pip_only_packages]
+
+ lines += [" - pip:"]
+ lines += [f" - {p}" for p in packages if p in pip_only_packages]
+ lines += [" - -e ."]
+
+ f.write("\n".join(lines))
+
+ elif dependencies == "Pipfile":
+ with open(dependencies, "w") as f:
+ lines = ["[packages]"]
+ lines += [f'{p} = "*"' for p in sorted(packages)]
+
+ lines += [f'"{module_name}" ={{editable = true, path = "."}}']
+
+ lines += ["", "[requires]", f'python_version = "{python_version}"']
+
+ f.write("\n".join(lines))
diff --git a/ccds/monkey_patch.py b/ccds/monkey_patch.py
new file mode 100644
index 000000000..b68c8d22b
--- /dev/null
+++ b/ccds/monkey_patch.py
@@ -0,0 +1,130 @@
+from collections import OrderedDict
+from pathlib import Path
+
+from cookiecutter.environment import StrictEnvironment
+from cookiecutter.exceptions import UndefinedVariableInTemplate
+from cookiecutter.generate import generate_context
+from cookiecutter.prompt import (
+ prompt_choice_for_config,
+ read_user_choice,
+ read_user_variable,
+ render_variable,
+)
+from jinja2.exceptions import UndefinedError
+
+
+def _prompt_choice_and_subitems(cookiecutter_dict, env, key, options, no_input):
+ result = {}
+
+ # first, get the selection
+ rendered_options = [
+ render_variable(env, list(raw.keys())[0], cookiecutter_dict) for raw in options
+ ]
+
+ if no_input:
+ selected = rendered_options[0]
+ else:
+ selected = read_user_choice(key, rendered_options)
+
+ selected_item = [
+ list(c.values())[0] for c in options if list(c.keys())[0] == selected
+ ][0]
+
+ result[selected] = {}
+
+ # then, fill in the sub values for that item
+ if isinstance(selected_item, dict):
+ for subkey, raw in selected_item.items():
+ # We are dealing with a regular variable
+ val = render_variable(env, raw, cookiecutter_dict)
+
+ if not no_input:
+ val = read_user_variable(subkey, val)
+
+ result[selected][subkey] = val
+ elif isinstance(selected_item, list):
+ val = prompt_choice_for_config(
+ cookiecutter_dict, env, selected, selected_item, no_input
+ )
+ result[selected] = val
+ elif isinstance(selected_item, str):
+ result[selected] = selected_item
+
+ return result
+
+
+def prompt_for_config(context, no_input=False):
+ """
+ Prompts the user to enter new config, using context as a source for the
+ field names and sample values.
+ :param no_input: Prompt the user at command line for manual configuration?
+ """
+ cookiecutter_dict = OrderedDict([])
+ env = StrictEnvironment(context=context)
+
+ # First pass: Handle simple and raw variables, plus choices.
+ # These must be done first because the dictionaries keys and
+ # values might refer to them.
+ for key, raw in context["cookiecutter"].items():
+ if key.startswith("_"):
+ cookiecutter_dict[key] = raw
+ continue
+
+ try:
+ if isinstance(raw, list):
+ if isinstance(raw[0], dict):
+ val = _prompt_choice_and_subitems(
+ cookiecutter_dict, env, key, raw, no_input
+ )
+ cookiecutter_dict[key] = val
+ else:
+ # We are dealing with a choice variable
+ val = prompt_choice_for_config(
+ cookiecutter_dict, env, key, raw, no_input
+ )
+ cookiecutter_dict[key] = val
+ elif not isinstance(raw, dict):
+ # We are dealing with a regular variable
+ val = render_variable(env, raw, cookiecutter_dict)
+
+ if not no_input:
+ val = read_user_variable(key, val)
+
+ cookiecutter_dict[key] = val
+ except UndefinedError as err:
+ msg = "Unable to render variable '{}'".format(key)
+ raise UndefinedVariableInTemplate(msg, err, context)
+
+ # Second pass; handle the dictionaries.
+ for key, raw in context["cookiecutter"].items():
+ try:
+ if isinstance(raw, dict):
+ # We are dealing with a dict variable
+ val = render_variable(env, raw, cookiecutter_dict)
+
+ if not no_input:
+ val = read_user_dict( # noqa: F821 referencable in patched context
+ key, val
+ )
+
+ cookiecutter_dict[key] = val
+ except UndefinedError as err:
+ msg = "Unable to render variable '{}'".format(key)
+ raise UndefinedVariableInTemplate(msg, err, context)
+
+ return cookiecutter_dict
+
+
+def generate_context_wrapper(*args, **kwargs):
+ """Hardcoded in cookiecutter, so we override:
+ https://github.com/cookiecutter/cookiecutter/blob/2bd62c67ec3e52b8e537d5346fd96ebd82803efe/cookiecutter/main.py#L85
+ """
+ # replace full path to cookiecutter.json with full path to ccds.json
+ kwargs["context_file"] = str(Path(kwargs["context_file"]).with_name("ccds.json"))
+
+ parsed_context = generate_context(*args, **kwargs)
+
+ # replace key
+ parsed_context["cookiecutter"] = parsed_context["ccds"]
+ del parsed_context["ccds"]
+ return parsed_context
diff --git a/cookiecutter.json b/cookiecutter.json
index 161f59e46..9e1a294af 100644
--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -1,10 +1,3 @@
{
- "project_name": "project_name",
- "repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
- "author_name": "Your name (or your organization/company/team)",
- "description": "A short description of the project.",
- "open_source_license": ["MIT", "BSD-3-Clause", "No license file"],
- "s3_bucket": "[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')",
- "aws_profile": "default",
- "python_interpreter": ["python3", "python"]
+ "DEPRECATED": "Use of the `cookiecutter` command is deprecated. Please use `ccds` in place of `cookiecutter`. To continue using the deprecated template, use `cookiecutter ... -c v1`."
}
diff --git a/dev-requirements.txt b/dev-requirements.txt
new file mode 100644
index 000000000..5cab2fb43
--- /dev/null
+++ b/dev-requirements.txt
@@ -0,0 +1,17 @@
+-e .
+
+ansi2html
+black
+chardet
+flake8
+isort
+mkdocs
+mkdocs-material
+mkdocs-gen-files
+mkdocs-include-markdown-plugin
+pexpect
+pipenv
+pytest
+termynal
+virtualenvwrapper; sys_platform != 'win32'
+virtualenvwrapper-win; sys_platform == 'win32'
diff --git a/docs/README.md b/docs/README.md
index 4b4075dfe..a91d63132 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,15 +1,15 @@
Generating the docs
----------
-Install requirements:
+Install development requirements:
- pip install -r requirements.txt
+ pip install -r dev-requirements.txt
Change directories into the docs folder:
cd docs
-Use [mkdocs](http://www.mkdocs.org/) structure to update the documentation. Test locally with:
+Use [mkdocs](https://www.mkdocs.org/) structure to update the documentation. Test locally with:
mkdocs serve
diff --git a/docs/docs/all-options.md b/docs/docs/all-options.md
new file mode 100644
index 000000000..c7295f7d2
--- /dev/null
+++ b/docs/docs/all-options.md
@@ -0,0 +1,6 @@
+# Commandline options
+
+CCDS provides a number of choices that you can use to customize your project. The defaults work well for many projects, but lots of tooling choices are supported. Here are the options for tools that you can use:
+
+
+
\ No newline at end of file
diff --git a/docs/docs/ccds.png b/docs/docs/ccds.png
new file mode 100644
index 000000000..dfdc52ac8
Binary files /dev/null and b/docs/docs/ccds.png differ
diff --git a/docs/docs/contributing.md b/docs/docs/contributing.md
new file mode 100644
index 000000000..b23e74ada
--- /dev/null
+++ b/docs/docs/contributing.md
@@ -0,0 +1,5 @@
+# Contributing
+
+The Cookiecutter Data Science project is opinionated, but not afraid to be wrong. Best practices change, tools evolve, and lessons are learned. **The goal of this project is to make it easier to start, structure, and share an analysis.** [Pull requests](https://github.com/drivendata/cookiecutter-data-science/pulls) and [filing issues](https://github.com/drivendata/cookiecutter-data-science/issues) is encouraged. We'd love to hear what works for you, and what doesn't.
+
+If you use the Cookiecutter Data Science project, link back to this page or [give us a holler](https://twitter.com/drivendataorg) and [let us know](mailto:info@drivendata.org)!
diff --git a/docs/docs/css/extra.css b/docs/docs/css/extra.css
index d94b922e1..f7174c83b 100644
--- a/docs/docs/css/extra.css
+++ b/docs/docs/css/extra.css
@@ -1,3 +1,125 @@
-h1, h2, h3 {
- margin-top: 77px;
+:root {
+ --md-primary-fg-color: #328F97;
+ --md-primary-fg-color--light: #328F97;
+ --md-primary-fg-color--dark: #328F97;
+
+ --md-accent-fg-color: #328F97;
+
+ --md-footer-bg-color: white;
+ --md-footer-fg-color: #222;
+ --md-footer-fg-color--light: #222;
+ --md-footer-fg-color--lighter: #222;
}
+
+.md-typeset {
+ -webkit-print-color-adjust: exact;
+ color-adjust: exact;
+ font-size: 0.85rem;
+ line-height: 1.4;
+}
+
+.md-typeset h1 {
+ font-weight: 800;
+ color: #222;
+ margin-bottom: -0.25rem;
+}
+
+.md-typeset h2,
+.md-typeset h3 {
+ font-weight: 600;
+ color: #222;
+}
+
+.md-typeset a {
+ color: #297c82;
+ word-break: break-word;
+}
+
+.md-typeset pre {
+ display: flex;
+}
+
+.md-typeset code {
+ font-size: .8em;
+ background-color: #f5f5f5;
+ color: #193d3d;
+ min-width: 35rem;
+ flex-shrink: 1;
+}
+
+.js .md-typeset .tabbed-labels {
+ max-width: 35rem;
+}
+
+.md-typeset .admonition.info,
+.md-typeset details.info,
+.md-typeset .admonition.note,
+.md-typeset details.note {
+ border-color: #328F97;
+}
+.md-typeset .info > .admonition-title,
+.md-typeset .info > summary,
+.md-typeset .note > .admonition-title,
+.md-typeset .note > summary {
+ background-color: #328F9726;
+ border-color: #328F97;
+}
+
+.md-typeset .info > .admonition-title::before,
+.md-typeset .info > summary::before,
+.md-typeset .note > .admonition-title::before,
+.md-typeset .note > summary::before {
+ background-color: #328F97;
+}
+
+.md-header__title {
+ font-family: "Space Mono";
+ font-weight: 400;
+ font-style: normal;
+ font-size: 0.9rem;
+}
+
+.md-typeset > h1, .md-typeset > h2, .md-typeset > h3, .md-typeset > p, .md-typeset > ul, .md-typeset > ol, .md-typeset > blockquote, .md-typeset > div.admonition {
+ max-width: 35rem;
+}
+
+.md-typeset blockquote {
+ font-size: 1.0rem;
+ font-weight: 300;
+}
+
+#termynal {
+ /* 40 lines of 2ex */
+ height: 80ex !important;
+ min-height: 80ex !important;
+ max-height: 80ex !important;
+ overflow: scroll !important;
+ font-size: 1.5ex !important;
+}
+
+[data-ty] {
+ line-height: 2ex !important;
+ white-space: pre;
+}
+
+.newline {
+ line-height: 0 !important;
+}
+
+.inline-input,
+.default-text {
+ display: inline-block !important;
+}
+
+.md-logo img {
+ height: 3rem !important;
+}
+
+.md-header, .md-footer, .md-footer-meta {
+ color: #222;
+ background-color: white;
+}
+
+.md-nav__link--active {
+ font-weight: 600;
+}
\ No newline at end of file
diff --git a/docs/docs/favicon.ico b/docs/docs/favicon.ico
index eb9c2a1ab..418214bb9 100644
Binary files a/docs/docs/favicon.ico and b/docs/docs/favicon.ico differ
diff --git a/docs/docs/index.md b/docs/docs/index.md
index 9eded0971..b7b1de9ee 100644
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@@ -1,92 +1,102 @@
# Cookiecutter Data Science
-_A logical, reasonably standardized, but flexible project structure for doing and sharing data science work._
+_A logical, flexible, and reasonably standardized project structure for doing and sharing data science work._
-## Why use this project structure?
+
+
+
-> We're not talking about bikeshedding the indentation aesthetics or pedantic formatting standards — ultimately, data science code quality is about correctness and reproducibility.
+!!! info "CCDS V2 Announcement"
-When we think about data analysis, we often think just about the resulting reports, insights, or visualizations. While these end products are generally the main event, it's easy to focus on making the products _look nice_ and ignore the _quality of the code that generates them_. Because these end products are created programmatically, **code quality is still important**! And we're not talking about bikeshedding the indentation aesthetics or pedantic formatting standards — ultimately, data science code quality is about correctness and reproducibility.
+ Version 2 of Cookiecutter Data Science has launched recently. To learn more about what's different and what's in progress, see the [announcement blog post for more information](https://drivendata.co/blog/ccds-v2).
-It's no secret that good analyses are often the result of very scattershot and serendipitous explorations. Tentative experiments and rapidly testing approaches that might not work out are all part of the process for getting to the good stuff, and there is no magic bullet to turn data exploration into a simple, linear progression.
-That being said, once started it is not a process that lends itself to thinking carefully about the structure of your code or project layout, so it's best to start with a clean, logical structure and stick to it throughout. We think it's a pretty big win all around to use a fairly standardized setup like this one. Here's why:
+## Quickstart
+Cookiecutter Data Science v2 requires Python 3.8+. Since this is a cross-project utility application, we recommend installing it with [pipx](https://pypa.github.io/pipx/). Installation command options:
-### Other people will thank you
+=== "With pipx (recommended)"
-> Nobody sits around before creating a new Rails project to figure out where they want to put their views; they just run `rails new` to get a standard project skeleton like everybody else.
+ ```bash
+ pipx install cookiecutter-data-science
-A well-defined, standard project structure means that a newcomer can begin to understand an analysis without digging in to extensive documentation. It also means that they don't necessarily have to read 100% of the code before knowing where to look for very specific things.
+ # From the parent directory where you want your project
+ ccds
+ ```
-Well organized code tends to be self-documenting in that the organization itself provides context for your code without much overhead. People will thank you for this because they can:
+=== "With pip"
- - Collaborate more easily with you on this analysis
- - Learn from your analysis about the process and the domain
- - Feel confident in the conclusions at which the analysis arrives
+ ```bash
+ pip install cookiecutter-data-science
+ `
+ # From the parent directory where you want your project
+ ccds
+ ```
-A good example of this can be found in any of the major web development frameworks like Django or Ruby on Rails. Nobody sits around before creating a new Rails project to figure out where they want to put their views; they just run `rails new` to get a standard project skeleton like everybody else. Because that default project structure is _logical_ and _reasonably standard across most projects_, it is much easier for somebody who has never seen a particular project to figure out where they would find the various moving parts.
+=== "With conda (coming soon!)"
-Another great example is the [Filesystem Hierarchy Standard](https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard) for Unix-like systems. The `/etc` directory has a very specific purpose, as does the `/tmp` folder, and everybody (more or less) agrees to honor that social contract. That means a Red Hat user and an Ubuntu user both know roughly where to look for certain types of files, even when using each other's system — or any other standards-compliant system for that matter!
+ ```bash
+ # conda install cookiecutter-data-science -c conda-forge
-Ideally, that's how it should be when a colleague opens up your data science project.
+ # From the parent directory where you want your project
+ # ccds
+ ```
-### You will thank you
+=== "Use the v1 template"
-Ever tried to reproduce an analysis that you did a few months ago or even a few years ago? You may have written the code, but it's now impossible to decipher whether you should use `make_figures.py.old`, `make_figures_working.py` or `new_make_figures01.py` to get things done. Here are some questions we've learned to ask with a sense of existential dread:
+ ```bash
+ pip install cookiecutter
-* Are we supposed to go in and join the column X to the data before we get started or did that come from one of the notebooks?
-* Come to think of it, which notebook do we have to run first before running the plotting code: was it "process data" or "clean data"?
-* Where did the shapefiles get downloaded from for the geographic plots?
-* _Et cetera, times infinity._
+ # From the parent directory where you want your project
+ cookiecutter https://github.com/drivendata/cookiecutter-data-science -c v1
+ ```
-These types of questions are painful and are symptoms of a disorganized project. A good project structure encourages practices that make it easier to come back to old work, for example separation of concerns, abstracting analysis as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph), and engineering best practices like version control.
+!!! info "Use the ccds command-line tool"
-### Nothing here is binding
+ Cookiecutter Data Science v2 now requires installing the new `cookiecutter-data-science` Python package, which extends the functionality of the [`cookiecutter`](https://cookiecutter.readthedocs.io/en/stable/README.html) templating utility. Use the provided `ccds` command-line program instead of `cookiecutter`.
-> "A foolish consistency is the hobgoblin of little minds" — Ralph Waldo Emerson (and [PEP 8!](https://www.python.org/dev/peps/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds))
-Disagree with a couple of the default folder names? Working on a project that's a little nonstandard and doesn't exactly fit with the current structure? Prefer to use a different package than one of the (few) defaults?
+## Starting a new project
-**Go for it!** This is a lightweight structure, and is intended to be a good _starting point_ for many projects. Or, as PEP 8 put it:
+Starting a new project is as easy as running this command at the command line. No need to create a directory first, the cookiecutter will do it for you.
-> Consistency within a project is more important. Consistency within one module or function is the most important. ... However, know when to be inconsistent -- sometimes style guide recommendations just aren't applicable. When in doubt, use your best judgment. Look at other examples and decide what looks best. And don't hesitate to ask!
+```bash
+ccds
+```
-## Getting started
+The `ccds` commandline tool defaults to the Cookiecutter Data Science template, but you can pass your own template as the first argument if you want.
-With this in mind, we've created a data science cookiecutter template for projects in Python. Your analysis doesn't have to be in Python, but the template does provide some Python boilerplate that you'd want to remove (in the `src` folder for example, and the Sphinx documentation skeleton in `docs`).
-### Requirements
+## Example
- - Python 2.7 or 3.5
- - [cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: `pip install cookiecutter`
+
-### Starting a new project
+Now that you've got your project, you're ready to go! You should do the following:
-Starting a new project is as easy as running this command at the command line. No need to create a directory first, the cookiecutter will do it for you.
+ - **Check out the directory structure** below so you know what's in the project and how to use it.
+ - **Read the [opinions](opinions.md)** that are baked into the project so you understand best practices and the philosophy behind the project structure.
+ - **Read the [using the template](using-the-template.md) guide** to understand how to get started on a project that uses the template.
-```nohighlight
-cookiecutter https://github.com/drivendata/cookiecutter-data-science
-```
-### Example
+ Enjoy!
-
## Directory structure
-```nohighlight
-├── LICENSE
-├── Makefile <- Makefile with commands like `make data` or `make train`
+The directory structure of your new project will look something like this (depending on the settings that you choose):
+
+```
+├── LICENSE <- Open-source license if one is chosen
+├── Makefile <- Makefile with convenience commands like `make data` or `make train`
├── README.md <- The top-level README for developers using this project.
├── data
-│ ├── external <- Data from third party sources.
-│ ├── interim <- Intermediate data that has been transformed.
-│ ├── processed <- The final, canonical data sets for modeling.
-│ └── raw <- The original, immutable data dump.
+│ ├── external <- Data from third party sources.
+│ ├── interim <- Intermediate data that has been transformed.
+│ ├── processed <- The final, canonical data sets for modeling.
+│ └── raw <- The original, immutable data dump.
│
-├── docs <- A default Sphinx project; see sphinx-doc.org for details
+├── docs <- A default mkdocs project; see www.mkdocs.org for details
│
├── models <- Trained and serialized models, model predictions, or model summaries
│
@@ -94,153 +104,33 @@ cookiecutter https://github.com/drivendata/cookiecutter-data-science
│ the creator's initials, and a short `-` delimited description, e.g.
│ `1.0-jqp-initial-data-exploration`.
│
+├── pyproject.toml <- Project configuration file with package metadata for
+│ {{ cookiecutter.module_name }} and configuration for tools like black
+│
├── references <- Data dictionaries, manuals, and all other explanatory materials.
│
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
-│ └── figures <- Generated graphics and figures to be used in reporting
+│ └── figures <- Generated graphics and figures to be used in reporting
│
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
│
-├── setup.py <- Make this project pip installable with `pip install -e`
-├── src <- Source code for use in this project.
-│ ├── __init__.py <- Makes src a Python module
-│ │
-│ ├── data <- Scripts to download or generate data
-│ │ └── make_dataset.py
-│ │
-│ ├── features <- Scripts to turn raw data into features for modeling
-│ │ └── build_features.py
-│ │
-│ ├── models <- Scripts to train models and then use trained models to make
-│ │ │ predictions
-│ │ ├── predict_model.py
-│ │ └── train_model.py
-│ │
-│ └── visualization <- Scripts to create exploratory and results oriented visualizations
-│ └── visualize.py
+├── setup.cfg <- Configuration file for flake8
│
-└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
-```
-
-## Opinions
-
-There are some opinions implicit in the project structure that have grown out of our experience with what works and what doesn't when collaborating on data science projects. Some of the opinions are about workflows, and some of the opinions are about tools that make life easier. Here are some of the beliefs which this project is built on—if you've got thoughts, please [contribute or share them](#contributing).
-
-### Data is immutable
-
-Don't ever edit your raw data, especially not manually, and especially not in Excel. Don't overwrite your raw data. Don't save multiple versions of the raw data. Treat the data (and its format) as immutable. The code you write should move the raw data through a pipeline to your final analysis. You shouldn't have to run all of the steps every time you want to make a new figure (see [Analysis is a DAG](#analysis-is-a-dag)), but anyone should be able to reproduce the final products with only the code in `src` and the data in `data/raw`.
-
-Also, if data is immutable, it doesn't need source control in the same way that code does. Therefore, ***by default, the data folder is included in the `.gitignore` file.*** If you have a small amount of data that rarely changes, you may want to include the data in the repository. Github currently warns if files are over 50MB and rejects files over 100MB. Some other options for storing/syncing large data include [AWS S3](https://aws.amazon.com/s3/) with a syncing tool (e.g., [`s3cmd`](http://s3tools.org/s3cmd)), [Git Large File Storage](https://git-lfs.github.com/), [Git Annex](https://git-annex.branchable.com/), and [dat](http://dat-data.com/). Currently by default, we ask for an S3 bucket and use [AWS CLI](http://docs.aws.amazon.com/cli/latest/reference/s3/index.html) to sync data in the `data` folder with the server.
-
-### Notebooks are for exploration and communication
-
-Notebook packages like the [Jupyter notebook](http://jupyter.org/), [Beaker notebook](http://beakernotebook.com/), [Zeppelin](http://zeppelin-project.org/), and other literate programming tools are very effective for exploratory data analysis. However, these tools can be less effective for reproducing an analysis. When we use notebooks in our work, we often subdivide the `notebooks` folder. For example, `notebooks/exploratory` contains initial explorations, whereas `notebooks/reports` is more polished work that can be exported as html to the `reports` directory.
-
-Since notebooks are challenging objects for source control (e.g., diffs of the `json` are often not human-readable and merging is near impossible), we recommended not collaborating directly with others on Jupyter notebooks. There are two steps we recommend for using notebooks effectively:
-
- 1. Follow a naming convention that shows the owner and the order the analysis was done in. We use the format `--.ipynb` (e.g., `0.3-bull-visualize-distributions.ipynb`).
-
- 2. Refactor the good parts. Don't write code to do the same task in multiple notebooks. If it's a data preprocessing task, put it in the pipeline at `src/data/make_dataset.py` and load data from `data/interim`. If it's useful utility code, refactor it to `src`.
-
- Now by default we turn the project into a Python package (see the `setup.py` file). You can import your code and use it in notebooks with a cell like the following:
-
-```
-# OPTIONAL: Load the "autoreload" extension so that code can change
-%load_ext autoreload
-
-# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
-%autoreload 2
-
-from src.data import make_dataset
-```
-
-### Analysis is a directed acyclic graph ([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph))
-
-Often in an analysis you have long-running steps that preprocess data or train models. If these steps have been run already (and you have stored the output somewhere like the `data/interim` directory), you don't want to wait to rerun them every time. We prefer [`make`](https://www.gnu.org/software/make/) for managing steps that depend on each other, especially the long-running ones. Make is a common tool on Unix-based platforms (and [is available for Windows]()). Following the [`make` documentation](https://www.gnu.org/software/make/), [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions), and [portability guide](http://www.gnu.org/savannah-checkouts/gnu/autoconf/manual/autoconf-2.69/html_node/Portable-Make.html#Portable-Make) will help ensure your Makefiles work effectively across systems. Here are [some](http://zmjones.com/make/) [examples](http://blog.kaggle.com/2012/10/15/make-for-data-scientists/) to [get started](https://web.archive.org/web/20150206054212/http://www.bioinformaticszen.com/post/decomplected-workflows-makefiles/). A number of data folks use `make` as their tool of choice, including [Mike Bostock](https://bost.ocks.org/mike/make/).
-
-There are other tools for managing DAGs that are written in Python instead of a DSL (e.g., [Paver](http://paver.github.io/paver/#), [Luigi](http://luigi.readthedocs.org/en/stable/index.html), [Airflow](https://airflow.apache.org/index.html), [Snakemake](https://snakemake.readthedocs.io/en/stable/), [Ruffus](http://www.ruffus.org.uk/), or [Joblib](https://pythonhosted.org/joblib/memory.html)). Feel free to use these if they are more appropriate for your analysis.
-
-### Build from the environment up
-
-The first step in reproducing an analysis is always reproducing the computational environment it was run in. You need the same tools, the same libraries, and the same versions to make everything play nicely together.
-
-One effective approach to this is use [virtualenv](https://virtualenv.pypa.io/en/latest/) (we recommend [virtualenvwrapper](https://virtualenvwrapper.readthedocs.org/en/latest/) for managing virtualenvs). By listing all of your requirements in the repository (we include a `requirements.txt` file) you can easily track the packages needed to recreate the analysis. Here is a good workflow:
-
- 1. Run `mkvirtualenv` when creating a new project
- 2. `pip install` the packages that your analysis needs
- 3. Run `pip freeze > requirements.txt` to pin the exact package versions used to recreate the analysis
- 4. If you find you need to install another package, run `pip freeze > requirements.txt` again and commit the changes to version control.
-
-If you have more complex requirements for recreating your environment, consider a virtual machine based approach such as [Docker](https://www.docker.com/) or [Vagrant](https://www.vagrantup.com/). Both of these tools use text-based formats (Dockerfile and Vagrantfile respectively) you can easily add to source control to describe how to create a virtual machine with the requirements you need.
-
-### Keep secrets and configuration out of version control
-
-You _really_ don't want to leak your AWS secret key or Postgres username and password on Github. Enough said — see the [Twelve Factor App](http://12factor.net/config) principles on this point. Here's one way to do this:
-
-#### Store your secrets and config variables in a special file
-
-Create a `.env` file in the project root folder. Thanks to the `.gitignore`, this file should never get committed into the version control repository. Here's an example:
-
-```nohighlight
-# example .env file
-DATABASE_URL=postgres://username:password@localhost:5432/dbname
-AWS_ACCESS_KEY=myaccesskey
-AWS_SECRET_ACCESS_KEY=mysecretkey
-OTHER_VARIABLE=something
-```
-
-#### Use a package to load these variables automatically.
-
-If you look at the stub script in `src/data/make_dataset.py`, it uses a package called [python-dotenv](https://github.com/theskumar/python-dotenv) to load up all the entries in this file as environment variables so they are accessible with `os.environ.get`. Here's an example snippet adapted from the `python-dotenv` documentation:
-
-```python
-# src/data/dotenv_example.py
-import os
-from dotenv import load_dotenv, find_dotenv
-
-# find .env automagically by walking up directories until it's found
-dotenv_path = find_dotenv()
-
-# load up the entries as environment variables
-load_dotenv(dotenv_path)
-
-database_url = os.environ.get("DATABASE_URL")
-other_variable = os.environ.get("OTHER_VARIABLE")
+└── {{ cookiecutter.module_name }} <- Source code for use in this project.
+ │
+ ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module
+ │
+ ├── config.py <- Store useful variables and configuration
+ │
+ ├── dataset.py <- Scripts to download or generate data
+ │
+ ├── features.py <- Code to create features for modeling
+ │
+ ├── modeling
+ │ ├── __init__.py
+ │ ├── predict.py <- Code to run model inference with trained models
+ │ └── train.py <- Code to train models
+ │
+ └── plots.py <- Code to create visualizations
```
-
-#### AWS CLI configuration
-When using Amazon S3 to store data, a simple method of managing AWS access is to set your access keys to environment variables. However, managing mutiple sets of keys on a single machine (e.g. when working on multiple projects) it is best to use a [credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html), typically located in `~/.aws/credentials`. A typical file might look like:
-```
-[default]
-aws_access_key_id=myaccesskey
-aws_secret_access_key=mysecretkey
-
-[another_project]
-aws_access_key_id=myprojectaccesskey
-aws_secret_access_key=myprojectsecretkey
-```
-You can add the profile name when initialising a project; assuming no applicable environment variables are set, the profile credentials will be used by default.
-
-### Be conservative in changing the default folder structure
-
-To keep this structure broadly applicable for many different kinds of projects, we think the best approach is to be liberal in changing the folders around for _your_ project, but be conservative in changing the default structure for _all_ projects.
-
-We've created a folder-layout label specifically for issues proposing to add, subtract, rename, or move folders around. More generally, we've also created a needs-discussion label for issues that should have some careful discussion and broad support before being implemented.
-
-## Contributing
-
-The Cookiecutter Data Science project is opinionated, but not afraid to be wrong. Best practices change, tools evolve, and lessons are learned. **The goal of this project is to make it easier to start, structure, and share an analysis.** [Pull requests](https://github.com/drivendata/cookiecutter-data-science/pulls) and [filing issues](https://github.com/drivendata/cookiecutter-data-science/issues) is encouraged. We'd love to hear what works for you, and what doesn't.
-
-If you use the Cookiecutter Data Science project, link back to this page or [give us a holler](https://twitter.com/drivendataorg) and [let us know](mailto:info@drivendata.org)!
-
-## Links to related projects and references
-
-Project structure and reproducibility is talked about more in the R research community. Here are some projects and blog posts if you're working in R that may help you out.
-
- - [Project Template](http://projecttemplate.net/index.html) - An R data analysis template
- - "[Designing projects](http://nicercode.github.io/blog/2013-04-05-projects/)" on Nice R Code
- - "[My research workflow](http://www.carlboettiger.info/2012/05/06/research-workflow.html)" on Carlboettiger.info
- - "[A Quick Guide to Organizing Computational Biology Projects](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000424)" in PLOS Computational Biology
-
-Finally, a huge thanks to the [Cookiecutter](https://cookiecutter.readthedocs.org/en/latest/) project ([github](https://github.com/audreyr/cookiecutter)), which is helping us all spend less time thinking about and writing boilerplate and more time getting things done.
diff --git a/docs/docs/js/extra.js b/docs/docs/js/extra.js
new file mode 100644
index 000000000..d81f81110
--- /dev/null
+++ b/docs/docs/js/extra.js
@@ -0,0 +1,28 @@
+/* Smooth scrolling for termynal replay */
+
+function scrollToBottomOfContainer(container, element) {
+ var positionToScroll = element.offsetTop + element.offsetHeight - container.offsetHeight;
+ container.scrollTo({
+ top: positionToScroll,
+ behavior: 'smooth'
+ });
+}
+
+// Select the node that will be observed for mutations
+const targetNode = document.getElementById("termynal");
+
+// Options for the observer (which mutations to observe)
+const config = { attributes: false, childList: true, subtree: false };
+
+// Callback function to execute when mutations are observed
+const callback = (mutationList, observer) => {
+ for (const mutation of mutationList) {
+ scrollToBottomOfContainer(targetNode, mutation.target);
+ }
+};
+
+// Create an observer instance linked to the callback function
+const observer = new MutationObserver(callback);
+
+// Start observing the target node for configured mutations
+observer.observe(targetNode, config);
diff --git a/docs/docs/logo.svg b/docs/docs/logo.svg
new file mode 100644
index 000000000..8cdf10263
--- /dev/null
+++ b/docs/docs/logo.svg
@@ -0,0 +1,13 @@
+
+
\ No newline at end of file
diff --git a/docs/docs/opinions.md b/docs/docs/opinions.md
new file mode 100644
index 000000000..cd1be70be
--- /dev/null
+++ b/docs/docs/opinions.md
@@ -0,0 +1,161 @@
+# Opinions
+
+The default project structure reflects certain opinions about how to do collaborative data science work. These opinions grew out of our own experiences with what works and what doesn't. Some of these opinions are about workflows, and others are about tools that can make the process easier. These opinions are discussed below. If you have any thoughts, please [contribute or share them](contributing.md).
+
+### Data analysis is a directed acyclic graph
+
+_Don't ever edit your raw data. Especially not manually. And especially not in Excel._
+
+The most important features of a quality data analysis are correctness and reproducibility—anyone should be able to re-run your analysis using only your code and raw data and produce the same final products. The best way to ensure correctness is to test your analysis code. **The best way to ensure reproducibility is to treat your data analysis pipeline as a directed acyclic graph ([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph))**. This means each step of your analysis is a node in a directed graph with no loops. You can run through the graph forwards to recreate any analysis output, or you can trace backwards from an output to examine the combination of code and data that created it.
+
+### Raw data is immutable
+
+That proper data analysis is a DAG means that **raw data must be treated as immutable**—it's okay to read and copy raw data to manipulate it into new outputs, but never okay to change it in place. This informs the design of the default `data/` directory subfolders in which data originates from `raw/` and `external/`, intermediate analytical outputs get serialized or cached in `interim/`, and final products end up in `processed/` (the number or names of these folders is less important than flow of data between them).
+
+Some **do**s and **don't**s that follow from treating data analysis as a DAG:
+
+* ✅ **Do** write code that moves the raw data through a pipeline to your final analysis.
+* ✅ **Do** serialize or cache the intermediate outputs of long-running steps.
+* ✅ **Do** make it possible (and ideally, documented and automated) for anyone to reproduce your final data products with only the code in `{{ cookiecutter.module_name }}` and the data in `data/raw/` (and `data/external/`).
+
+* ⛔ **Don't** _ever_ edit your raw data, especially not manually, and _especially_ not in Excel. This includes changing file formats or fixing errors that might break a tool that's trying to read your data file.
+* ⛔ **Don't** overwrite your raw data with a newly processed or cleaned version.
+* ⛔ **Don't** save multiple versions of the raw data.
+
+### Data should (mostly) not be kept in source control
+
+Another consequence of treating data as immutable is that data doesn't need source control in the same way that code does. Therefore, **by default, the `data/` folder is included in the `.gitignore` file.** If you have a small amount of data that rarely changes, you _may_ want to include the data in the repository. GitHub [currently](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github#file-size-limits) warns you if files are over 50MB and rejects any files over 100MB.
+
+If you have larger amounts of data, consider storing and syncing with a cloud service like [Amazon S3](https://aws.amazon.com/s3/), [Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-overview), or [Google Cloud Storage](https://cloud.google.com/storage/docs/introduction). We've had a good experience with Amazon S3, if you're not tied to any particular cloud provider. Syncing tools can help you manage the data. Some examples:
+
+- Amazon S3: [`awscli`](https://aws.amazon.com/cli/), [`s3cmd`](https://s3tools.org/s3cmd), [`s5cmd`](https://github.com/peak/s5cmd), [`geesefs`](https://github.com/yandex-cloud/geesefs)
+- Azure Blob Storage: [`azcopy`](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10)
+- Google Cloud Platform: [`gcloud`](https://cloud.google.com/storage/docs/discover-object-storage-gcloud)
+- Supports multiple clouds: [`cloudpathlib`](https://github.com/drivendataorg/cloudpathlib), [`fsspec`](https://filesystem-spec.readthedocs.io/en/stable/)
+
+There is also the [Git Large File Storage (LFS)](https://git-lfs.github.com/) extension which lets you track large files in git but stores the files on a separate server. GitHub provides [some storage compatible with Git LFS](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-git-large-file-storage).
+
+### Tools for DAGs
+
+DAGs are so common in data and software processes that many tools have been built to manage them. We prefer [`make`](https://www.gnu.org/software/make/) for managing steps that depend on each other, especially the long-running ones. Make is a common tool on Unix-based platforms (and is available for Windows via [chocolatey](https://community.chocolatey.org/packages/make)). Following the [`make` documentation](https://www.gnu.org/software/make/), [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions), and [portability guide](https://www.gnu.org/savannah-checkouts/gnu/autoconf/manual/autoconf-2.69/html_node/Portable-Make.html#Portable-Make) will help ensure your Makefiles work effectively across systems. Here are [some](http://zmjones.com/make/) [examples](https://blog.kaggle.com/2012/10/15/make-for-data-scientists/) to [get started](https://web.archive.org/web/20150206054212/https://www.bioinformaticszen.com/post/decomplected-workflows-makefiles/). A number of data folks use `make` as their tool of choice, including [Mike Bostock](https://bost.ocks.org/mike/make/).
+
+There are other tools for managing DAGs that are written in Python, instead of their own language. Popular ones include [Airflow](https://airflow.apache.org/index.html), [Luigi](https://luigi.readthedocs.org/en/stable/index.html), [Snakemake](https://snakemake.readthedocs.io/en/stable/), [Prefect](https://github.com/PrefectHQ/prefect), [Dagster](https://github.com/dagster-io/dagster), and [Joblib](https://joblib.readthedocs.io/en/latest/memory.html). Feel free to use these if they are more appropriate for your analysis.
+
+## Notebooks are for exploration and communication, source files are for repetition
+
+> Source code is superior for replicability because it is more portable, can be tested more easily, and is easier to code review.
+
+[Jupyter Notebook](https://jupyter.org/), [Apache Zeppelin](https://zeppelin.apache.org/), and other literate programming tools are very effective for exploratory data analysis because they enable rapid iteration and visualization of results. However, these tools can be less effective for reproducing an analysis. Source code is superior for replicability because it is more portable, can be tested more easily, and is easier to code review.
+
+When we use notebooks in our work, we often subdivide the `notebooks/` folder to keep things organized and legible. For example, `notebooks/exploratory/` contains initial explorations, whereas `notebooks/reports/` is more polished work that can be exported as html to the `reports/` directory. We also recommend that you follow a naming convention that shows the owner and the order the analysis was done in. We use the format `--.ipynb` (e.g., `0.3-bull-visualize-distributions.ipynb`). Since notebooks are challenging objects for source control (e.g., diffs of the `json` are often not human-readable and merging is near impossible), we recommended not collaborating directly with others on Jupyter notebooks. We also recommend using a tool like [`nbautoexport`](https://github.com/drivendataorg/nbautoexport) to make reviewing changes to notebooks easier.
+
+### Refactor the good parts into source code
+
+Don't write code to do the same task in multiple notebooks. If it's a data preprocessing task, put it in the pipeline at `{{ cookiecutter.module_name }}/data/make_dataset.py` and load data from `data/interim/`. If it's useful utility code, refactor it to `{{ cookiecutter.module_name }}`. Classic signs that you are ready to move from a notebook to source code include duplicating old notebooks to start new ones, copy/pasting functions between notebooks, and creating object-oriented classes within notebooks.
+
+We make it easy to refactor notebook code because the ccds template makes your project a Python package by default and installs it locally in the requirements file of your chosen environment manager. This enables you to import your project's source code and use it in notebooks with a cell like the following:
+
+```python
+# OPTIONAL: Load the "autoreload" extension so that code can change
+%load_ext autoreload
+
+# OPTIONAL: always reload modules so that as you change code
+# in {{ cookiecutter.module_name }}, it gets loaded
+%autoreload 2
+
+from {{ cookiecutter.module_name }}.data import make_dataset
+```
+
+## Keep your modeling organized
+
+Different modeling pipelines are different, so we don't provide a lot of baked-in structure to the `models/` directory. However, documenting modeling experiments is critical to enable reproducibility, continuous learning, and improvement. You should implement experiment documentation procedures that enable you to, at minimum, identify the provenance of the data and the version of the code that the experiment used, as well as the metrics used to measure performance.
+
+For smaller projects, it's fine to start with homegrown tracking using file formats like JSON that are both human- and machine-readable. You can graduate to experiment tracking tools (e.g., [MLflow](https://mlflow.org/)) if it's warranted or if they're standard for your team.
+
+
+## Build from the environment up
+
+The first step in reproducing an analysis is always replicating the computational environment it was run in. You need the same tools, the same libraries, and the same versions to make everything play nicely together.
+
+Doing so in Python requires choosing and configuring an environment management tool. The ecosystem for this tooling has evolved a lot in recent years.
+
+For data science work, we prefer to use the **conda** package manager because it also manages non-Python packages, including system library dependencies that you often run into in data science. Our recommended way to install conda is with [Miniforge](https://github.com/conda-forge/miniforge), though the [Miniconda and Anaconda installers](https://docs.conda.io/projects/conda/en/stable/user-guide/install/index.html) from Anaconda are also popular.
+
+You can also use Python-only environment managers. Popular tools in this category include [virtualenv](https://virtualenv.pypa.io/en/latest/), [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/stable/), [Poetry](https://python-poetry.org/), [Pipenv](https://pipenv.pypa.io/en/latest/), and others.
+
+Cookiecutter v2 lets you pick from among many of these, or to initialize your project without one so you can roll your own.
+
+If you have more complex requirements for recreating your environment, consider a virtual machine based approach such as [Docker](https://www.docker.com/) or [Vagrant](https://www.vagrantup.com/). Both of these tools use text-based formats (Dockerfile and Vagrantfile respectively) that you can easily add to source control to describe how to create a virtual machine with the requirements you need. You might also consider using [`pip-tools`](https://github.com/jazzband/pip-tools) or [`conda-lock`](https://github.com/conda/conda-lock) to generate a file that appropriately pins your dependencies.
+
+## Keep secrets and configuration out of version control
+
+You _really_ don't want to leak your AWS secret key or Postgres username and password on Github—see the [Twelve Factor App](https://12factor.net/config) principles on this point. Here's one way to do this:
+
+### Store your secrets and config variables in a special file
+
+Create a `.env` file in the project root folder. Thanks to the `.gitignore`, this file should never get committed into the version control repository. Here's an example:
+
+```nohighlight
+# example .env file
+DATABASE_URL=postgres://username:password@localhost:5432/dbname
+AWS_ACCESS_KEY=myaccesskey
+AWS_SECRET_ACCESS_KEY=mysecretkey
+OTHER_VARIABLE=something
+```
+
+### Use a package to load these variables automatically.
+
+If you look at the stub script in `{{ cookiecutter.module_name }}/data/make_dataset.py`, it uses a package called [python-dotenv](https://github.com/theskumar/python-dotenv) to load up all the entries in this file as environment variables so they are accessible with `os.environ.get`. Here's an example snippet adapted from the `python-dotenv` documentation:
+
+```python
+# {{ cookiecutter.module_name }}/data/dotenv_example.py
+import os
+from dotenv import load_dotenv, find_dotenv
+
+# find .env automagically by walking up directories until it's found
+dotenv_path = find_dotenv()
+
+# load up the entries as environment variables
+load_dotenv(dotenv_path)
+
+database_url = os.environ.get("DATABASE_URL")
+other_variable = os.environ.get("OTHER_VARIABLE")
+```
+
+### AWS CLI configuration
+
+When using Amazon S3 to store data, a simple method of managing AWS access is to set your access keys to environment variables. However, managing mutiple sets of keys on a single machine (e.g. when working on multiple projects) it is best to use a [credentials file](https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html), typically located in `~/.aws/credentials`. A typical file might look like:
+```
+[default]
+aws_access_key_id=myaccesskey
+aws_secret_access_key=mysecretkey
+
+[another_project]
+aws_access_key_id=myprojectaccesskey
+aws_secret_access_key=myprojectsecretkey
+```
+
+You can add the profile name when initialising a project; assuming no applicable environment variables are set, the profile credentials will be used be default.
+
+## Encourage adaptation from a consistent default
+
+To keep this structure broadly applicable for many different kinds of projects, we think the best approach is to be liberal in changing the folders around for _your_ project, but be conservative in modifying the default cookiecutter structure for _all_ projects.
+
+We've created a folder-layout label specifically for issues proposing to add, subtract, rename, or move folders around. More generally, we've also created a needs-discussion label for issues that should have some careful discussion and broad support before being implemented.
+
+### Examples of template adaptation and evolution
+
+A project's organizational needs may differ from the start and can change over time. Here are some examples of directions to go in evolving your project structure.
+
+#### Example 1: Simplifying
+
+Some projects don't require multiple sub-directories to organize their module code. When a few python files can effectively accomplish all that is required, flattening folders into files can make things easier to track and maintain. You can see an example of this in our [cyfi package](https://github.com/drivendataorg/cyfi/tree/main/cyfi). If it's in the template but you don't need it, delete it!
+
+#### Example 2: Expanding
+
+By contrast, we've added more folders to organize module code on more complex projects. A good example of this is our [zamba package](https://github.com/drivendataorg/zamba/tree/master/zamba) for which we've introduced new folders to handle task-specific portions of the codebase.
+
+#### Example 3: Re-organizing
+
+On long-running projects, the `notebooks` folder can get congested. One adaptation we've employed is to add a top-level `research/` folder (and a corresponding `data/research` data folder) that contains sub-folders for individual experiments. These sub-folders can contain their own notebooks, code, and even their own Makefiles that inherit from the parent project `Makefile`.
+
diff --git a/docs/docs/related.md b/docs/docs/related.md
new file mode 100644
index 000000000..9a79b1cf3
--- /dev/null
+++ b/docs/docs/related.md
@@ -0,0 +1,10 @@
+# Links to related projects and references
+
+Project structure and reproducibility is talked about more in the R research community. Here are some projects and blog posts if you're working in R that may help you out.
+
+ - [Project Template](https://projecttemplate.net/index.html) - An R data analysis template
+ - "[Designing projects](https://nicercode.github.io/blog/2013-04-05-projects/)" on Nice R Code
+ - "[My research workflow](https://www.carlboettiger.info/2012/05/06/research-workflow.html)" on Carlboettiger.info
+ - "[A Quick Guide to Organizing Computational Biology Projects](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000424)" in PLOS Computational Biology
+
+Finally, a huge thanks to the [Cookiecutter](https://cookiecutter.readthedocs.org/en/latest/) project ([github](https://github.com/audreyr/cookiecutter)), which is helping us all spend less time thinking about and writing boilerplate and more time getting things done.
\ No newline at end of file
diff --git a/docs/docs/using-the-template.md b/docs/docs/using-the-template.md
new file mode 100644
index 000000000..f2518d786
--- /dev/null
+++ b/docs/docs/using-the-template.md
@@ -0,0 +1,177 @@
+# Using the template
+
+You've [created](index.md#starting-a-new-project) your project. You've [read the opinions section](opinions.md). You're ready to start doing some work.
+
+Here's a quick guide of the kinds of things we do once our project is ready to go. We'll walk through this example using git and GitHub for version control and jupyter notebooks for exploration, but you can use whatever tools you like.
+
+## Set up version control
+
+Often, we start by initializing a `git` repository to track the code we write in version control and collaborate with teammates. At the command line, you can do this with the following commands which do the following: turn the folder into a git repository, add all of the files and folders created by CCDS into source control (except for what is in the `.gitignore` file), and then make a commit to the repository.
+
+```bash
+# From inside your newly created project directory
+git init
+git add .
+git commit -m "CCDS defaults"
+```
+
+We usually commit the entire default CCDS structure so it is easy to track the changes we make to the structure in version history.
+
+Now that the default layout is committed, you should push it to a shared repository. You can do this through the interface of whatever source control platform you use. This may be GitHub, GitLab, Bitbucket, or something else.
+
+If you use GitHub and have the [gh cli tool](https://cli.github.com/) you can easily create a new repository for the project from the commandline.
+
+```bash
+gh repo create
+```
+
+You'll be asked a series of questions to set up the repository on GitHub. Once you're done you'll be able to push the changes in your local repository to GitHub.
+
+## Make as a task runner
+
+[GNU Make](https://www.gnu.org/software/make/) is a tool that is typically pre-installed on Linux and macOS systems, and we use it as a "task runner" for CCDS projects. This means that when we have a series of shell commands we might want to run, such setting up a virtual environment or syncing the data to cloud storage, we set them up as recipes in the `Makefile`. To use a recipe, simply call
+
+```bash
+make RECIPE_NAME
+```
+
+where `RECIPE_NAME` is the same of a recipe like `requirements` or `sync_data_up`. Projects created by CCDS include a `Makefile` with several recipes we've predefined. You'll see them referenced in the sections below. To see a list of all available commands, just call
+
+```bash
+make
+```
+
+on its own.
+
+!!! note "Using Windows?"
+
+ If you are using Windows, you may need to install Make. See the ["Installing Make on Windows"](#installing-make-on-windows) section below.
+
+## Create a Python virtual environment
+
+We often use Python for our data science projects. We use a virtual environment to manage the packages we use in our project. This is a way to keep the packages we use in our project separate from the packages we use in other projects. This is especially important when we are working on multiple projects at the same time.
+
+Cookicutter Data Science supports [a few options](opinions.md#build-from-the-environment-up) for Python virtual environment management, but no matter which you choose, you can create an environment with the following commands:
+
+```bash
+make create_environment
+```
+
+Once the environment is created, you'll want to make sure to activate it. You'll have to do this following the instructions for your specific environment manager. We recommend using a shell prompt that shows you which environment you are in, so you can easily tell if you are in the right environment, for example [starship](https://starship.rs/). You can also use the command `which python` to make sure that your shell is pointing to the version of Python associated with your virtual environment.
+
+Once you are sure that your environment is activated in your shell, you can install the packages you need for your project. You can do this with the following command:
+
+```bash
+make requirements
+```
+
+## Add your data
+
+There's no universal advice for how to manage your data, but here are some recommendations for starting points depending on where the data comes from:
+
+ - **Flat files (e.g., CSVs or spreadsheets) that are static** - Put these files into your `data/raw` folder and then run `make sync_data_up` to push the raw data to your cloud provider.
+ - **Flat files that change and are extracted from somewhere** - Add a Python script to your source module in `data/make_dataset.py` that downloads the data and puts it in the `data/raw` folder. Then you can use this to get the latest and push it up to your cloud host as it changes (be careful not to [override your raw data](opinions.md/#data-analysis-is-a-directed-acyclic-graph)).
+ - **Databases you connect to with credentials** - Store your credentials in `.env`. We recommend adding a `db.py` file or similar to your `data` module that connects to the database and pulls data. If your queries generally fit into memory, you can just have functions in the `db.py` to load data that you use in analysis. If not, you'll want to add a script like above to download the data to the `data/raw` folder.
+
+## Check out a branch
+
+We'll talk about code review later, but it's a good practice to use feature branches and pull requests to keep your development organized. Now that you have source control configured, you can check out a branch to work with:
+
+```
+git checkout -b initial-exploration
+```
+
+## Open a notebook
+
+!!! note
+
+ The following assumes you're using a Jupyter notebook, but while the specific commands for another notebook tool may look a little bit different, the process guidance still applies.
+
+Now you're ready to do some analysis! Make sure that your project-specific environment is activated (you can check with `which jupyter`) and run `jupyter notebook notebooks` to open a Jupyter notebook in the `notebooks/` folder. You can start by creating a new notebook and doing some exploratory data analysis. We often name notebooks with a scheme that looks like this:
+
+```
+0.01-pjb-data-source-1.ipynb
+```
+
+ - `0.01` - Helps leep work in chronological order. The structure is `PHASE.NOTEBOOK`. `NOTEBOOK` is just the Nth notebook in that phase to be created. For phases of the project, we generally use a scheme like the following, but you are welcome to design your own conventions:
+ - `0` - Data exploration - often just for exploratory work
+ - `1` - Data cleaning and feature creation - often writes data to `data/processed` or `data/interim`
+ - `2` - Visualizations - often writes publication-ready viz to `reports`
+ - `3` - Modeling - training machine learning models
+ - `4` - Publication - Notebooks that get turned directly into reports
+- `pjb` - Your initials; this is helpful for knowing who created the notebook and prevents collisions from people working in the same notebook.
+- `data-source-1` - A description of what the notebook covers
+
+Now that you have your notebook going, start your analysis!
+
+## Refactoring code into shared modules
+
+As your project goes on, you'll want to refactor your code in a way that makes it easy to share between notebooks and scripts. We recommend creating a module in the `{{ cookiecutter.module_name }}` folder that contains the code you use in your project. This is a good way to make sure that you can use the same code in multiple places without having to copy and paste it.
+
+Because the default structure is a Python package and is installed by default, you can do the following to make that code available to you within a Jupyter notebook.
+
+First, we recommend turning on the `autoreload` extension. This will make Jupyter always go back to the source code for the module rather than caching it in memory. If your notebook isn't reflecting the latest changes from your changes to a `.py` file, try restarting the kernel and make sure `autoreload` is on. We add a cell at the top of the notebook with the following:
+
+```
+%load_ext autoreload
+%autoreload 2
+```
+
+Now all your code should be importable. At the start of the CCDS project, you picked a module name. It's the same name as the folder that is in the root project directory. For example, if the module name were `my_project` you could use code by importing it like:
+
+```python
+from my_project.data import make_dataset
+
+data = make_dataset()
+```
+
+Now it should be easy to do any refactoring you need to do to make your code more modular and reusable.
+
+
+## Make your code reviewable
+
+We try to review every line of code written at DrivenData. Data science code in particular has the risk of executing without erroring, but not being "correct" (for example, you use standard deviation in a calculation rather than variance). We've found the best way to catch these kinds of mistakes is a second set of eyes looking at the code.
+
+Right now on GitHub, it is hard to observe and comment on changes that happen in Jupyter notebooks. We develop and maintain a tool called [`nbautoexport`](https://nbautoexport.drivendata.org/stable/) that automatically exports a `.py` version of your Jupyter noteobok every time you save it. This means that you can commit both the `.ipynb` and the `.py` to source control so that reviewers can leave line-by-line comments on your notebook code. To use it, you will need to add `nbautoexport` to your requirements file and then run `make requirements` to install it.
+
+Once `nbautoexport` is installed, you can setup the nbautoexport tool for your project with the following commands at the commandline:
+
+```
+nbautoexport install
+nbautoexport configure notebooks
+```
+
+Once you're done with your work, you'll want to add it to a commit and push it to GitHub so you can open a pull request. You can do that with the following commandline commands
+
+```
+git add . # stage all changed files to include them in the commit
+git commit -m "Initial exploration" # commit the changes with a message
+git push # publish the changes
+```
+
+Now you'll be able to [create a Pull Request in GitHub](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
+
+## Changing the `Makefile`
+
+There's no magic in the `Makefile`. We often add project-specific commands or update the existing ones over the course of a project. For example, we've added scripts to generate reports with pandoc, build and serve documentation, publish static sites from assets, package code for distribution, and more.
+
+## Installing Make on Windows
+
+Unfortunately, GNU Make is not typically pre-installed on Windows. Here are a few different options for getting Make:
+
+- **Use a package manager.** You will need to install the package manager first if you don't already have it.
+ - [chocolatey](https://community.chocolatey.org/) ([entry for Make](https://community.chocolatey.org/packages/make))
+ ```bash
+ choco install make
+ ```
+ - [winget](https://winget.run/) ([entry for Make](https://winget.run/pkg/GnuWin32/Make))
+ ```bash
+ winget install -e --id GnuWin32.Make
+ ```
+ - [scoop](https://scoop.sh/) ([entry for Make](https://scoop.sh/#/apps?q=make&id=c43ff861c0f1713336e5304d85334a29ffb86317))
+ ```bash
+ scoop install main/make
+ ```
+- **Windows Subsystem for Linux**. WSL is a full, non-virtualized Linux environment inside Windows. You can use it to run all of your data science workflows on Ubuntu, and it will have Make included. See instructions for installing WSL [here](https://learn.microsoft.com/en-us/windows/wsl/install).
+- **Cygwin**. A Unix-like development environment that includes Make. See instructions about installing Cygwin [here](https://www.cygwin.com/install.html).
+- **MinGW**. A GNU development environment that runs on Windows and includes Make. See information about installing MinGW [here](https://www.mingw-w64.org/downloads/).
diff --git a/docs/docs/v1.md b/docs/docs/v1.md
new file mode 100644
index 000000000..749994698
--- /dev/null
+++ b/docs/docs/v1.md
@@ -0,0 +1,69 @@
+# Template v1
+
+While v1 has been deprecated and we recommend using [v2](index.md) moving forward, you can still use the v1 template should you so choose. Below you'll find there requirements and default folder structure for the v1 template.
+
+## Requirements
+
+ - Python 3.8+
+ - [cookiecutter Python package](http://cookiecutter.readthedocs.org/en/latest/installation.html) >= 1.4.0: `pip install cookiecutter`
+
+## Starting a new project
+
+Starting a new project is as easy as running this command at the command line. No need to create a directory first, the cookiecutter will do it for you.
+
+```nohighlight
+cookiecutter https://github.com/drivendata/cookiecutter-data-science
+```
+
+## Example
+
+
+
+## Directory structure
+
+```nohighlight
+├── LICENSE
+├── Makefile <- Makefile with commands like `make data` or `make train`
+├── README.md <- The top-level README for developers using this project.
+├── data
+│ ├── external <- Data from third party sources.
+│ ├── interim <- Intermediate data that has been transformed.
+│ ├── processed <- The final, canonical data sets for modeling.
+│ └── raw <- The original, immutable data dump.
+│
+├── docs <- A default Sphinx project; see sphinx-doc.org for details
+│
+├── models <- Trained and serialized models, model predictions, or model summaries
+│
+├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
+│ the creator's initials, and a short `-` delimited description, e.g.
+│ `1.0-jqp-initial-data-exploration`.
+│
+├── references <- Data dictionaries, manuals, and all other explanatory materials.
+│
+├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
+│ └── figures <- Generated graphics and figures to be used in reporting
+│
+├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
+│ generated with `pip freeze > requirements.txt`
+│
+├── setup.py <- Make this project pip installable with `pip install -e`
+├── src <- Source code for use in this project.
+│ ├── __init__.py <- Makes src a Python module
+│ │
+│ ├── data <- Scripts to download or generate data
+│ │ └── make_dataset.py
+│ │
+│ ├── features <- Scripts to turn raw data into features for modeling
+│ │ └── build_features.py
+│ │
+│ ├── models <- Scripts to train models and then use trained models to make
+│ │ │ predictions
+│ │ ├── predict_model.py
+│ │ └── train_model.py
+│ │
+│ └── visualization <- Scripts to create exploratory and results oriented visualizations
+│ └── visualize.py
+│
+└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
+```
diff --git a/docs/docs/why.md b/docs/docs/why.md
new file mode 100644
index 000000000..af82dffec
--- /dev/null
+++ b/docs/docs/why.md
@@ -0,0 +1,49 @@
+# Why use this project structure?
+
+> We're not talking about bikeshedding the indentation aesthetics or pedantic formatting standards — ultimately, data science code quality is about correctness and reproducibility.
+
+When we think about data analysis, we often think just about the resulting reports, insights, or visualizations. While these end products are generally the main event, it's easy to focus on making the products _look nice_ and ignore the _quality of the code that generates them_. Because these end products are created programmatically, **code quality is still important**! And we're not talking about bikeshedding the indentation aesthetics or pedantic formatting standards — ultimately, **data science code quality is about correctness and reproducibility**.
+
+It's no secret that good analyses are often the result of very scattershot and serendipitous explorations. Tentative experiments and rapidly testing approaches that might not work out are all part of the process for getting to the good stuff, and there is no magic bullet to turn data exploration into a simple, linear progression.
+
+That being said, once started it is not a process that lends itself to thinking carefully about the structure of your code or project layout, so it's best to start with a clean, logical structure and stick to it throughout. We think it's a pretty big win all around to use a fairly standardized setup like this one. Here's why:
+
+
+## Other people will thank you
+
+> Nobody sits around before creating a new Rails project to figure out where they want to put their views; they just run `rails new` to get a standard project skeleton like everybody else.
+
+A well-defined, standard project structure means that a newcomer can begin to understand an analysis without digging in to extensive documentation. It also means that they don't necessarily have to read 100% of the code before knowing where to look for very specific things.
+
+Well organized code tends to be self-documenting in that the organization itself provides context for your code without much overhead. People will thank you for this because they can:
+
+ - Collaborate more easily with you on this analysis
+ - Learn from your analysis about the process and the domain
+ - Feel confident in the conclusions at which the analysis arrives
+
+A good example of this can be found in any of the major web development frameworks like Django or Ruby on Rails. Nobody sits around before creating a new Rails project to figure out where they want to put their views; they just run `rails new` to get a standard project skeleton like everybody else. Because that default project structure is _logical_ and _reasonably standard across most projects_, it is much easier for somebody who has never seen a particular project to figure out where they would find the various moving parts.
+
+Another great example is the [Filesystem Hierarchy Standard](https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard) for Unix-like systems. The `/etc` directory has a very specific purpose, as does the `/tmp` folder, and everybody (more or less) agrees to honor that social contract. That means a Red Hat user and an Ubuntu user both know roughly where to look for certain types of files, even when using each other's system — or any other standards-compliant system for that matter!
+
+Ideally, that's how it should be when a colleague opens up your data science project.
+
+## You will thank you
+
+Ever tried to reproduce an analysis that you did a few months ago or even a few years ago? You may have written the code, but it's now impossible to decipher whether you should use `make_figures.py.old`, `make_figures_working.py` or `new_make_figures01.py` to get things done. Here are some questions we've learned to ask with a sense of existential dread:
+
+* Are we supposed to go in and join the column X to the data before we get started or did that come from one of the notebooks?
+* Come to think of it, which notebook do we have to run first before running the plotting code: was it "process data" or "clean data"?
+* Where did the shapefiles get downloaded from for the geographic plots?
+* _Et cetera, times infinity._
+
+These types of questions are painful and are symptoms of a disorganized project. A good project structure encourages practices that make it easier to come back to old work, for example separation of concerns, abstracting analysis as a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph), and engineering best practices like version control.
+
+## Nothing here is binding
+
+> "A foolish consistency is the hobgoblin of little minds" — Ralph Waldo Emerson (and [PEP 8!](https://www.python.org/dev/peps/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds))
+
+Disagree with a couple of the default folder names? Working on a project that's a little nonstandard and doesn't exactly fit with the current structure? Prefer to use a different package than one of the (few) defaults?
+
+**Go for it!** This is a lightweight structure, and is intended to be a good _starting point_ for many projects. Or, as PEP 8 put it:
+
+> Consistency within a project is more important. Consistency within one module or function is the most important. ... However, know when to be inconsistent -- sometimes style guide recommendations just aren't applicable. When in doubt, use your best judgment. Look at other examples and decide what looks best. And don't hesitate to ask!
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 30ca202df..a00ab2a7c 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,12 +1,60 @@
site_name: Cookiecutter Data Science
site_description: A project template and directory structure for Python data science projects.
-site_favicon: favicon.ico
+site_url: https://cookiecutter-data-science.drivendata.org
repo_url: https://github.com/drivendata/cookiecutter-data-science
edit_uri: edit/master/docs/docs
copyright: Project maintained by the friendly folks at DrivenData.
-google_analytics: ['UA-54096005-4', 'auto']
-theme: cinder
-extra_css:
- - css/extra.css
+theme:
+ favicon: favicon.ico
+ features:
+ - navigation.instant
+ - toc.integrate
+ logo: ccds.png
+ name: material
+ custom_dir: overrides
+ palette:
+ primary: custom
+ accent: custom
+ font:
+ text: Work Sans
+ code: Space Mono
nav:
- Home: index.md
+ - Why ccds?: why.md
+ - Opinions: opinions.md
+ - Using the template: using-the-template.md
+ - All options: all-options.md
+ - Contributing: contributing.md
+ - Related projects: related.md
+ - v1 Template: v1.md
+extra:
+ analytics:
+ provider: google
+ property: G-DX14MC19WY
+extra_css:
+ - css/extra.css
+extra_javascript:
+ - js/extra.js
+markdown_extensions:
+ - admonition
+ - pymdownx.details
+ - pymdownx.superfences
+ - pymdownx.tabbed:
+ alternate_style: true
+ slugify: !!python/object/apply:pymdownx.slugs.slugify
+ kwds:
+ case: lower
+ - tables
+ - toc:
+ toc_depth: 2
+plugins:
+ - include-markdown
+ - termynal:
+ title: bash
+ buttons: macos
+ prompt_literal_start:
+ - "$"
+ - gen-files:
+ scripts:
+ - scripts/generate-termynal.py
+ - scripts/configuration-table.py
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
new file mode 100644
index 000000000..ea783fb88
--- /dev/null
+++ b/docs/overrides/main.html
@@ -0,0 +1,7 @@
+{% extends "base.html" %}
+
+{% block extrahead %}
+
+
+
+{% endblock %}
diff --git a/docs/overrides/partials/nav.html b/docs/overrides/partials/nav.html
new file mode 100644
index 000000000..01b8e5599
--- /dev/null
+++ b/docs/overrides/partials/nav.html
@@ -0,0 +1,42 @@
+{#-
+ Vendored from:
+ https://github.com/squidfunk/mkdocs-material/blob/bd708f16bc477f98aad68ad203826605e0298b8a/material/templates/partials/nav.html
+-#}
+{% import "partials/nav-item.html" as item with context %}
+{% set class = "md-nav md-nav--primary" %}
+{% if "navigation.tabs" in features %}
+ {% set class = class ~ " md-nav--lifted" %}
+{% endif %}
+{% if "toc.integrate" in features %}
+ {% set class = class ~ " md-nav--integrated" %}
+{% endif %}
+
+
+
+{#-
+ Added DD footer
+-#}
+
+
+
+
+ Cookiecutter Data Science is a DrivenData project.
+
\ No newline at end of file
diff --git a/docs/scripts/configuration-table.py b/docs/scripts/configuration-table.py
new file mode 100644
index 000000000..4b6f0eff5
--- /dev/null
+++ b/docs/scripts/configuration-table.py
@@ -0,0 +1,180 @@
+import json
+import re
+from pathlib import Path
+
+from ccds.hook_utils.dependencies import basic
+
+PROJECT_ROOT = Path(__file__).parents[2]
+
+
+def _table_row(items, delimiter="|"):
+ return f"| {' | '.join(items)} |"
+
+
+def _table_header():
+ return [
+ _table_row(
+ (
+ "Choice",
+ "Sub-field",
+ "Description",
+ "More information",
+ )
+ ),
+ _table_row(["---"] * 4),
+ ]
+
+
+def _new_section(item, item_type, default, description, more_info=""):
+ return [
+ f"## {item.replace('_', ' ').title()}",
+ "",
+ f"**Type:** {item_type}",
+ "",
+ f"**Default value:** `{default}`",
+ "",
+ description,
+ "",
+ f"_Find more information here:_ {more_info}" if more_info else "",
+ "",
+ ] + (_table_header() if item_type != "string" else [])
+
+
+def _ccds_help_to_lookups(help, prefix="", out=None):
+ if out is None:
+ out = {}
+
+ for item in help:
+ # add this item to the help
+ item_name = item.get("field", item.get("choice"))
+ item_key = item_name if not prefix else f"{prefix}.{item_name}"
+ out[item_key] = item["help"]
+
+ if choices := item.get("choices", None):
+ out.update(_ccds_help_to_lookups(choices, prefix=item_key, out=out))
+
+ if subfields := item.get("subfields", None):
+ out.update(_ccds_help_to_lookups(subfields, prefix=item_key, out=out))
+
+ return out
+
+
+def build_help_table_rows(data, help_lookup, lookup_prefix=""):
+ body_items = []
+ for top_key, top_value in data.items():
+ # top value is string, so it is just user entry
+ if isinstance(top_value, str):
+ item_help = help_lookup[f"{lookup_prefix}{top_key}"]
+
+ # simplify template render string
+ if m := re.search(r"{{ cookiecutter\.(.*) }}", top_value):
+ top_value = f"`{m.group(1)}`"
+
+ section = _new_section(
+ top_key,
+ "string",
+ top_value,
+ item_help["description"],
+ item_help["more_information"],
+ )
+ elif isinstance(top_value, list):
+ choices_help = help_lookup[f"{lookup_prefix}{top_key}"]
+
+ default = (
+ list(top_value[0].keys())[0]
+ if isinstance(top_value[0], dict)
+ else top_value[0]
+ )
+
+ section = _new_section(
+ top_key,
+ "choice",
+ default,
+ choices_help["description"],
+ choices_help["more_information"],
+ )
+ for ix, choice in enumerate(top_value):
+ if isinstance(choice, str):
+ item_help = help_lookup[f"{lookup_prefix}{top_key}.{choice}"]
+ more_info = (
+ item_help["more_information"]
+ if choice != "basic"
+ else item_help["more_information"] + (", ".join(basic))
+ )
+
+ section.append(
+ _table_row(
+ (
+ choice,
+ "",
+ item_help["description"],
+ more_info,
+ )
+ )
+ )
+ elif isinstance(choice, dict):
+ choice_key = list(choice.keys())[0]
+ item_help = help_lookup[f"{lookup_prefix}{top_key}.{choice_key}"]
+ section.append(
+ _table_row(
+ (
+ choice_key,
+ "",
+ item_help["description"],
+ item_help["more_information"],
+ )
+ )
+ )
+
+ # subfields
+ if isinstance(choice[choice_key], dict):
+ for subfield_key, subfield_value in choice[choice_key].items():
+ subfield_help = help_lookup[
+ f"{lookup_prefix}{top_key}.{choice_key}.{subfield_key}"
+ ]
+ section.append(
+ _table_row(
+ (
+ choice_key,
+ subfield_key,
+ subfield_help["description"],
+ subfield_help["more_information"],
+ )
+ )
+ )
+
+ body_items += section + [""]
+ return body_items
+
+
+def render_options_table():
+ with (PROJECT_ROOT / "ccds.json").open() as f:
+ data = json.load(f)
+
+ with (PROJECT_ROOT / "ccds-help.json").open() as f:
+ help = json.load(f)
+ help_lookup = _ccds_help_to_lookups(help)
+
+ body_items = build_help_table_rows(data, help_lookup)
+
+ output = "\n".join(body_items)
+ return output
+
+
+# script entry point for debugging
+if __name__ == "__main__":
+ print(render_options_table())
+
+# mkdocs build entry point
+else:
+ import mkdocs_gen_files
+
+ with mkdocs_gen_files.open("all-options.md", "r") as f:
+ options_file = f.read()
+
+ options_file = options_file.replace(
+ "", render_options_table()
+ )
+
+ with mkdocs_gen_files.open("all-options.md", "w") as f:
+ f.write(options_file)
diff --git a/docs/scripts/generate-termynal.py b/docs/scripts/generate-termynal.py
new file mode 100644
index 000000000..296257cfa
--- /dev/null
+++ b/docs/scripts/generate-termynal.py
@@ -0,0 +1,152 @@
+import shutil
+from pathlib import Path
+
+import pexpect
+from ansi2html import Ansi2HTMLConverter
+
+CCDS_ROOT = Path(__file__).parents[2].resolve()
+
+
+def execute_command_and_get_output(command, input_script):
+ input_script = iter(input_script)
+ child = pexpect.spawn(command, encoding="utf-8")
+
+ interaction_history = [f"$ {command}\n"]
+
+ prompt, user_input = next(input_script)
+
+ try:
+ while True:
+ index = child.expect([prompt, pexpect.EOF, pexpect.TIMEOUT])
+
+ if index == 0:
+ output = child.before + child.after
+ interaction_history += [line.strip() for line in output.splitlines()]
+
+ child.sendline(user_input)
+
+ try:
+ prompt, user_input = next(input_script)
+ except StopIteration:
+ pass
+
+ elif index == 1: # The subprocess has exited.
+ output = child.before
+ interaction_history += [line.strip() for line in output.splitlines()]
+ break
+ elif index == 2: # Timeout waiting for new data.
+ print("\nTimeout waiting for subprocess response.")
+ continue
+
+ finally:
+ return interaction_history
+
+
+ccds_script = [
+ ("project_name", "My Analysis"),
+ ("repo_name", "my_analysis"),
+ ("module_name", ""),
+ ("author_name", "Dat A. Scientist"),
+ ("description", "This is my analysis of the data."),
+ ("python_version_number", "3.12"),
+ ("Choose from", "3"),
+ ("bucket", "s3://my-aws-bucket"),
+ ("aws_profile", ""),
+ ("Choose from", "2"),
+ ("Choose from", "1"),
+ ("Choose from", "2"),
+ ("Choose from", "2"),
+ ("Choose from", "1"),
+]
+
+
+def run_scripts():
+ try:
+ output = []
+ output += execute_command_and_get_output(f"ccds {CCDS_ROOT}", ccds_script)
+ return output
+
+ finally:
+ # always cleanup
+ if Path("my_analysis").exists():
+ shutil.rmtree("my_analysis")
+
+
+def render_termynal():
+ # actually execute the scripts and capture the output
+ results = run_scripts()
+
+ # watch for inputs and format them differently
+ script = iter(ccds_script)
+ _, user_input = next(script)
+
+ conv = Ansi2HTMLConverter(inline=True)
+ html_lines = [
+ '
'
+ ]
+ result_collector = []
+
+ for line_ix, result in enumerate(results):
+ # style bash user inputs
+ if result.startswith("$"):
+ result = conv.convert(result.strip("$"), full=False)
+ html_lines.append(
+ f'{result}'
+ )
+
+ # style inline cookiecutter user inputs
+ elif ":" in result and user_input in result:
+ # treat all the options that were output as a single block
+ if len(result_collector) > 1:
+ prev_results = conv.convert(
+ "\n".join(result_collector[:-1]), full=False
+ )
+ html_lines.append(f"{prev_results}")
+
+ # split the line up into the prompt text with options, the default, and the user input
+ prompt, user_input = result.strip().split(":", 1)
+ prompt = conv.convert(prompt, full=False)
+ prompt = f'{result_collector[-1].strip()} {prompt}:'
+ user_input = conv.convert(user_input.strip(), full=False)
+
+ # treat the cookiecutter prompt as a shell prompt
+ out_line = f"{prompt}"
+ out_line += f'{user_input}'
+ html_lines.append(out_line)
+ html_lines.append('')
+ result_collector = []
+
+ try:
+ _, user_input = next(script)
+ except StopIteration:
+ user_input = "STOP ITER" # never true so we just capture the remaining rows after the script
+
+ # collect all the other lines for a single output
+ else:
+ result_collector.append(result)
+
+ html_lines.append("
")
+ output = "\n".join(html_lines)
+
+ # replace local directory in ccds call with URL so it can be used for documentation
+ output = output.replace(
+ str(CCDS_ROOT), "https://github.com/drivendata/cookiecutter-data-science"
+ )
+ return output
+
+
+# script entry point for debugging
+if __name__ == "__main__":
+ print(render_termynal())
+
+# mkdocs build entry point
+else:
+ import mkdocs_gen_files
+
+ with mkdocs_gen_files.open("index.md", "r") as f:
+ index = f.read()
+
+ index = index.replace("", render_termynal())
+
+ with mkdocs_gen_files.open("index.md", "w") as f:
+ f.write(index)
diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py
new file mode 100644
index 000000000..e7ac204d7
--- /dev/null
+++ b/hooks/post_gen_project.py
@@ -0,0 +1,79 @@
+import shutil
+from copy import copy
+from pathlib import Path
+
+# https://github.com/cookiecutter/cookiecutter/issues/824
+# our workaround is to include these utility functions in the CCDS package
+from ccds.hook_utils.custom_config import write_custom_config
+from ccds.hook_utils.dependencies import basic, packages, scaffold, write_dependencies
+
+#
+# TEMPLATIZED VARIABLES FILLED IN BY COOKIECUTTER
+#
+packages_to_install = copy(packages)
+
+# {% if cookiecutter.dataset_storage.s3 %}
+packages_to_install += ["awscli"]
+# {% endif %} #
+
+# {% if cookiecutter.include_code_scaffold == "Yes" %}
+packages_to_install += scaffold
+# {% endif %}
+
+# {% if cookiecutter.pydata_packages == "basic" %}
+packages_to_install += basic
+# {% endif %}
+
+# track packages that are not available through conda
+pip_only_packages = [
+ "awscli",
+ "python-dotenv",
+]
+
+# Use the selected documentation package specified in the config,
+# or none if none selected
+docs_path = Path("docs")
+# {% if cookiecutter.docs != "none" %}
+packages_to_install += ["{{ cookiecutter.docs }}"]
+pip_only_packages += ["{{ cookiecutter.docs }}"]
+docs_subpath = docs_path / "{{ cookiecutter.docs }}"
+for obj in docs_subpath.iterdir():
+ shutil.move(str(obj), str(docs_path))
+# {% endif %}
+
+# Remove all remaining docs templates
+for docs_template in docs_path.iterdir():
+ if docs_template.is_dir() and not docs_template.name == "docs":
+ shutil.rmtree(docs_template)
+
+#
+# POST-GENERATION FUNCTIONS
+#
+write_dependencies(
+ "{{ cookiecutter.dependency_file }}",
+ packages_to_install,
+ pip_only_packages,
+ repo_name="{{ cookiecutter.repo_name }}",
+ module_name="{{ cookiecutter.module_name }}",
+ python_version="{{ cookiecutter.python_version_number }}",
+)
+
+write_custom_config("{{ cookiecutter.custom_config }}")
+
+# Remove LICENSE if "No license file"
+if "{{ cookiecutter.open_source_license }}" == "No license file":
+ Path("LICENSE").unlink()
+
+# Make single quotes prettier
+# Jinja tojson escapes single-quotes with \u0027 since it's meant for HTML/JS
+pyproject_text = Path("pyproject.toml").read_text()
+Path("pyproject.toml").write_text(pyproject_text.replace(r"\u0027", "'"))
+
+# {% if cookiecutter.include_code_scaffold == "No" %}
+# remove everything except __init__.py so result is an empty package
+for generated_path in Path("{{ cookiecutter.module_name }}").iterdir():
+ if generated_path.is_dir():
+ shutil.rmtree(generated_path)
+ elif generated_path.name != "__init__.py":
+ generated_path.unlink()
+# {% endif %}
diff --git a/hooks/pre_gen_project.py b/hooks/pre_gen_project.py
index 2bd49c083..21f58047d 100644
--- a/hooks/pre_gen_project.py
+++ b/hooks/pre_gen_project.py
@@ -1,22 +1,4 @@
-def deprecation_warning():
- print("""
+# Functions here run before the project is generated.
-=============================================================================
-*** DEPRECATION WARNING ***
-
-Cookiecutter data science is moving to v2 soon, which will entail using
-the command `ccds ...` rather than `cookiecutter ...`. The cookiecutter command
-will continue to work, and this version of the template will still be available.
-To use the legacy template, you will need to explicitly use `-c v1` to select it.
-
-Please update any scripts/automation you have to append the `-c v1` option,
-which is available now.
-
-For example:
- cookiecutter -c v1 https://github.com/drivendata/cookiecutter-data-science
-=============================================================================
-
- """)
-
-
-deprecation_warning()
+# For the use of these hooks, see
+# See https://cookiecutter.readthedocs.io/en/1.7.2/advanced/hooks.html
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..44b5cc46f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,44 @@
+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[tool.flit.module]
+name = "ccds"
+
+[project]
+name = "cookiecutter-data-science"
+version = "2.0.0"
+description = "A logical, reasonably standardized but flexible project structure for doing and sharing data science work."
+authors = [
+ { name = "DrivenData", email = "info@drivendata.org" },
+]
+license = { file = "LICENSE" }
+readme = "README.md"
+classifiers = [
+ "Development Status :: 5 - Stable",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Topic :: Scientific/Engineering",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+requires-python = ">=3.8"
+dependencies = [
+ "click",
+ "cookiecutter",
+]
+
+[project.scripts]
+ccds = "ccds.__main__:main"
+
+[project.urls]
+"Homepage" = "https://drivendata.github.io/cookiecutter-data-science/"
+"Source Code" = "https://github.com/drivendata/cookiecutter-data-science/"
+"Bug Tracker" = "https://github.com/drivendata/cookiecutter-data-science/issues"
+"DrivenData" = "https://drivendata.co"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 07f48f4de..000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-mkdocs
-mkdocs-cinder
-cookiecutter
-pytest
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 000000000..0eb7d8386
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+[flake8]
+ignore = E203, E402, E501, W503
+max-line-length = 99
\ No newline at end of file
diff --git a/tests/conda_harness.sh b/tests/conda_harness.sh
new file mode 100644
index 000000000..b43c1ee77
--- /dev/null
+++ b/tests/conda_harness.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+set -ex
+
+# enable conda commands inside the script
+eval "$(conda shell.bash hook)"
+
+PROJECT_NAME=$(basename $1)
+CCDS_ROOT=$(dirname $0)
+
+# configure exit / teardown behavior
+function finish {
+ if [[ $(which python) == *"$PROJECT_NAME"* ]]; then
+ conda deactivate
+ fi
+
+ conda env remove -n $PROJECT_NAME -y
+}
+trap finish EXIT
+
+# source the steps in the test
+source $CCDS_ROOT/test_functions.sh
+
+# navigate to the generated project and run make commands
+cd $1
+
+# Fix for conda issue https://github.com/conda/conda/issues/7267 on MacOS
+if [ -e /usr/local/miniconda ]
+then
+ sudo chown -R $USER /usr/local/miniconda
+fi
+
+make
+make create_environment
+conda activate $PROJECT_NAME
+make requirements
+
+run_tests $PROJECT_NAME
diff --git a/tests/conftest.py b/tests/conftest.py
index 8acbfb271..7e9913874 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,47 +1,130 @@
-import sys
-import pytest
+import json
import shutil
+import sys
+import tempfile
+from contextlib import contextmanager
+from itertools import cycle, product
from pathlib import Path
-from cookiecutter import main
+
+import pytest
+
+from ccds.__main__ import api_main
CCDS_ROOT = Path(__file__).parents[1].resolve()
-args = {
- 'project_name': 'DrivenData',
- 'author_name': 'DrivenData',
- 'open_source_license': 'BSD-3-Clause',
- 'python_interpreter': 'python'
- }
+default_args = {
+ "project_name": "my_test_project",
+ "repo_name": "my-test-repo",
+ "module_name": "project_module",
+ "author_name": "DrivenData",
+ "description": "A test project",
+}
+
+
+def config_generator(fast=False):
+ cookiecutter_json = json.load((CCDS_ROOT / "ccds.json").open("r"))
+
+ # python versions for the created environment; match the root
+ # python version since Pipenv needs to be able to find an executable
+ running_py_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+ py_version = [("python_version_number", v) for v in [running_py_version]]
+
+ configs = product(
+ py_version,
+ [
+ ("environment_manager", opt)
+ for opt in cookiecutter_json["environment_manager"]
+ ],
+ [("dependency_file", opt) for opt in cookiecutter_json["dependency_file"]],
+ [("pydata_packages", opt) for opt in cookiecutter_json["pydata_packages"]],
+ )
+
+ def _is_valid(config):
+ config = dict(config)
+ # Pipfile + pipenv only valid combo for either
+ if (config["environment_manager"] == "pipenv") ^ (
+ config["dependency_file"] == "Pipfile"
+ ):
+ return False
+ # conda is the only valid env manager for environment.yml
+ if (config["dependency_file"] == "environment.yml") and (
+ config["environment_manager"] != "conda"
+ ):
+ return False
+ return True
+
+ # remove invalid configs
+ configs = [c for c in configs if _is_valid(c)]
-def system_check(basename):
- platform = sys.platform
- if 'linux' in platform:
- basename = basename.lower()
- return basename
+ # cycle over all values other multi-select fields that should be inter-operable
+ # and that we don't need to handle with combinatorics
+ cycle_fields = [
+ "dataset_storage",
+ "open_source_license",
+ "include_code_scaffold",
+ "docs",
+ ]
+ cyclers = {k: cycle(cookiecutter_json[k]) for k in cycle_fields}
+ for ind, c in enumerate(configs):
+ config = dict(c)
+ config.update(default_args)
+ # Alternate including the code scaffold
+ for field, cycler in cyclers.items():
+ config[field] = next(cycler)
+ config["repo_name"] += f"-{ind}"
+ yield config
-@pytest.fixture(scope='class', params=[{}, args])
-def default_baked_project(tmpdir_factory, request):
- temp = tmpdir_factory.mktemp('data-project')
- out_dir = Path(temp).resolve()
+ # just do a single config if fast passed once or three times
+ if fast == 1 or fast >= 3:
+ break
- pytest.param = request.param
- main.cookiecutter(
+
+def pytest_addoption(parser):
+ """Pass -F/--fast multiple times to speed up tests
+
+ default - execute makefile commands, all configs
+
+ -F - execute makefile commands, single config
+ -FF - skip makefile commands, all configs
+ -FFF - skip makefile commands, single config
+ """
+ parser.addoption(
+ "--fast",
+ "-F",
+ action="count",
+ default=0,
+ help="Speed up tests by skipping configs and/or Makefile validation",
+ )
+
+
+@pytest.fixture
+def fast(request):
+ return request.config.getoption("--fast")
+
+
+def pytest_generate_tests(metafunc):
+ # setup config fixture to get all of the results from config_generator
+ if "config" in metafunc.fixturenames:
+ metafunc.parametrize(
+ "config", config_generator(metafunc.config.getoption("fast"))
+ )
+
+
+@contextmanager
+def bake_project(config):
+ temp = Path(tempfile.mkdtemp(suffix="data-project")).resolve()
+
+ api_main.cookiecutter(
str(CCDS_ROOT),
no_input=True,
- extra_context=pytest.param,
- output_dir=out_dir
+ extra_context=config,
+ output_dir=temp,
+ overwrite_if_exists=True,
)
- pn = pytest.param.get('project_name') or 'project_name'
-
- # project name gets converted to lower case on Linux but not Mac
- pn = system_check(pn)
-
- proj = out_dir / pn
- request.cls.path = proj
- yield
+ yield temp / config["repo_name"]
# cleanup after
- shutil.rmtree(out_dir)
\ No newline at end of file
+ shutil.rmtree(temp)
diff --git a/tests/pipenv_harness.sh b/tests/pipenv_harness.sh
new file mode 100644
index 000000000..463756e5b
--- /dev/null
+++ b/tests/pipenv_harness.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+set -ex
+
+PROJECT_NAME=$(basename $1)
+CCDS_ROOT=$(dirname $0)
+
+# configure exit / teardown behavior
+function finish {
+ if [[ $(which python) == *"$PROJECT_NAME"* ]]; then
+ exit
+ fi
+
+ pipenv --rm
+}
+trap finish EXIT
+
+# source the steps in the test
+source $CCDS_ROOT/test_functions.sh
+
+# navigate to the generated project and run make commands
+cd $1
+make
+make create_environment
+
+# can happen outside of environment since pipenv knows based on Pipfile
+make requirements
+
+# test with pipenv run
+pipenv run python -c "import sys; assert \"$PROJECT_NAME\" in sys.executable"
diff --git a/tests/test_creation.py b/tests/test_creation.py
index b32d23d66..322c00ed3 100644
--- a/tests/test_creation.py
+++ b/tests/test_creation.py
@@ -1,113 +1,208 @@
+import json
import os
-import pytest
-from subprocess import check_output
-from conftest import system_check
+import sys
+from pathlib import Path
+from subprocess import PIPE, run
+
+from conftest import bake_project
+
+BASH_EXECUTABLE = os.getenv("BASH_EXECUTABLE", "bash")
def no_curlies(filepath):
- """ Utility to make sure no curly braces appear in a file.
- That is, was Jinja able to render everything?
+ """Utility to make sure no curly braces appear in a file.
+ That is, was Jinja able to render everything?
"""
- with open(filepath, 'r') as f:
- data = f.read()
-
- template_strings = [
- '{{',
- '}}',
- '{%',
- '%}'
- ]
+ data = filepath.open("r").read()
+
+ template_strings = ["{{", "}}", "{%", "%}"]
template_strings_in_file = [s in data for s in template_strings]
return not any(template_strings_in_file)
-@pytest.mark.usefixtures("default_baked_project")
-class TestCookieSetup(object):
- def test_project_name(self):
- project = self.path
- if pytest.param.get('project_name'):
- name = system_check('DrivenData')
- assert project.name == name
- else:
- assert project.name == 'project_name'
-
- def test_author(self):
- setup_ = self.path / 'setup.py'
- args = ['python', str(setup_), '--author']
- p = check_output(args).decode('ascii').strip()
- if pytest.param.get('author_name'):
- assert p == 'DrivenData'
- else:
- assert p == 'Your name (or your organization/company/team)'
-
- def test_readme(self):
- readme_path = self.path / 'README.md'
- assert readme_path.exists()
- assert no_curlies(readme_path)
- if pytest.param.get('project_name'):
- with open(readme_path) as fin:
- assert 'DrivenData' == next(fin).strip()
-
- def test_setup(self):
- setup_ = self.path / 'setup.py'
- args = ['python', str(setup_), '--version']
- p = check_output(args).decode('ascii').strip()
- assert p == '0.1.0'
-
- def test_license(self):
- license_path = self.path / 'LICENSE'
- assert license_path.exists()
- assert no_curlies(license_path)
-
- def test_license_type(self):
- setup_ = self.path / 'setup.py'
- args = ['python', str(setup_), '--license']
- p = check_output(args).decode('ascii').strip()
- if pytest.param.get('open_source_license'):
- assert p == 'BSD-3'
- else:
- assert p == 'MIT'
-
- def test_requirements(self):
- reqs_path = self.path / 'requirements.txt'
- assert reqs_path.exists()
- assert no_curlies(reqs_path)
- if pytest.param.get('python_interpreter'):
- with open(reqs_path) as fin:
- lines = list(map(lambda x: x.strip(), fin.readlines()))
- assert 'pathlib2' in lines
-
- def test_makefile(self):
- makefile_path = self.path / 'Makefile'
- assert makefile_path.exists()
- assert no_curlies(makefile_path)
-
- def test_folders(self):
- expected_dirs = [
- 'data',
- 'data/external',
- 'data/interim',
- 'data/processed',
- 'data/raw',
- 'docs',
- 'models',
- 'notebooks',
- 'references',
- 'reports',
- 'reports/figures',
- 'src',
- 'src/data',
- 'src/features',
- 'src/models',
- 'src/visualization',
+def test_baking_configs(config, fast):
+ """For every generated config in the config_generator, run all
+ of the tests.
+ """
+ print("using config", json.dumps(config, indent=2))
+ with bake_project(config) as project_directory:
+ verify_folders(project_directory, config)
+ verify_files(project_directory, config)
+ lint(project_directory)
+
+ if fast < 2:
+ verify_makefile_commands(project_directory, config)
+
+
+def verify_folders(root, config):
+ """Tests that expected folders and only expected folders exist."""
+ expected_dirs = [
+ ".",
+ "data",
+ "data/external",
+ "data/interim",
+ "data/processed",
+ "data/raw",
+ "docs",
+ "models",
+ "notebooks",
+ "references",
+ "reports",
+ "reports/figures",
+ config["module_name"],
+ ]
+
+ if config["include_code_scaffold"] == "Yes":
+ expected_dirs += [
+ f"{config['module_name']}/modeling",
+ ]
+
+ if config["docs"] == "mkdocs":
+ expected_dirs += ["docs/docs"]
+
+ expected_dirs = [
+ # (root / d).resolve().relative_to(root) for d in expected_dirs
+ Path(d)
+ for d in expected_dirs
+ ]
+
+ existing_dirs = [
+ d.resolve().relative_to(root) for d in root.glob("**") if d.is_dir()
+ ]
+
+ assert sorted(existing_dirs) == sorted(expected_dirs)
+
+
+def verify_files(root, config):
+ """Test that expected files and only expected files exist."""
+ expected_files = [
+ "Makefile",
+ "README.md",
+ "pyproject.toml",
+ "setup.cfg",
+ ".env",
+ ".gitignore",
+ "data/external/.gitkeep",
+ "data/interim/.gitkeep",
+ "data/processed/.gitkeep",
+ "data/raw/.gitkeep",
+ "docs/.gitkeep",
+ "notebooks/.gitkeep",
+ "references/.gitkeep",
+ "reports/.gitkeep",
+ "reports/figures/.gitkeep",
+ "models/.gitkeep",
+ f"{config['module_name']}/__init__.py",
+ ]
+
+ # conditional files
+ if not config["open_source_license"].startswith("No license"):
+ expected_files.append("LICENSE")
+
+ if config["include_code_scaffold"] == "Yes":
+ expected_files += [
+ f"{config['module_name']}/config.py",
+ f"{config['module_name']}/dataset.py",
+ f"{config['module_name']}/features.py",
+ f"{config['module_name']}/modeling/__init__.py",
+ f"{config['module_name']}/modeling/train.py",
+ f"{config['module_name']}/modeling/predict.py",
+ f"{config['module_name']}/plots.py",
]
- ignored_dirs = [
- str(self.path)
+ if config["docs"] == "mkdocs":
+ expected_files += [
+ "docs/mkdocs.yml",
+ "docs/README.md",
+ "docs/docs/index.md",
+ "docs/docs/getting-started.md",
]
- abs_expected_dirs = [str(self.path / d) for d in expected_dirs]
- abs_dirs, _, _ = list(zip(*os.walk(self.path)))
- assert len(set(abs_expected_dirs + ignored_dirs) - set(abs_dirs)) == 0
+ expected_files.append(config["dependency_file"])
+
+ expected_files = [Path(f) for f in expected_files]
+ existing_files = [f.relative_to(root) for f in root.glob("**/*") if f.is_file()]
+
+ assert sorted(existing_files) == sorted(expected_files)
+
+ for f in existing_files:
+ assert no_curlies(root / f)
+
+
+def verify_makefile_commands(root, config):
+ """Actually shell out to bash and run the make commands for:
+ - blank command listing commands
+ - create_environment
+ - requirements
+ Ensure that these use the proper environment.
+ """
+ test_path = Path(__file__).parent
+
+ if config["environment_manager"] == "conda":
+ harness_path = test_path / "conda_harness.sh"
+ elif config["environment_manager"] == "virtualenv":
+ harness_path = test_path / "virtualenv_harness.sh"
+ elif config["environment_manager"] == "pipenv":
+ harness_path = test_path / "pipenv_harness.sh"
+ elif config["environment_manager"] == "none":
+ return True
+ else:
+ raise ValueError(
+ f"Environment manager '{config['environment_manager']}' not found in test harnesses."
+ )
+
+ result = run(
+ [BASH_EXECUTABLE, str(harness_path), str(root.resolve())],
+ stderr=PIPE,
+ stdout=PIPE,
+ )
+ result_returncode = result.returncode
+
+ encoding = sys.stdout.encoding
+
+ if encoding is None:
+ encoding = "utf-8"
+
+ # normally hidden by pytest except in failure we want this displayed
+ print("PATH=", os.getenv("PATH"))
+ print("\n======================= STDOUT ======================")
+ stdout_output = result.stdout.decode(encoding)
+ print(stdout_output)
+
+ print("\n======================= STDERR ======================")
+ print(result.stderr.decode(encoding))
+
+ # Check that makefile help ran successfully
+ assert "Available rules:" in stdout_output
+ assert "clean Delete all compiled Python files" in stdout_output
+
+ assert result_returncode == 0
+
+
+def lint(root):
+ """Run the linters on the project."""
+ result = run(
+ ["make", "lint"],
+ cwd=root,
+ stderr=PIPE,
+ stdout=PIPE,
+ )
+ result_returncode = result.returncode
+
+ encoding = sys.stdout.encoding
+
+ if encoding is None:
+ encoding = "utf-8"
+
+ # normally hidden by pytest except in failure we want this displayed
+ print("PATH=", os.getenv("PATH"))
+ print("\n======================= STDOUT ======================")
+ stdout_output = result.stdout.decode(encoding)
+ print(stdout_output)
+
+ print("\n======================= STDERR ======================")
+ print(result.stderr.decode(encoding))
+
+ assert result_returncode == 0
diff --git a/tests/test_functions.sh b/tests/test_functions.sh
new file mode 100644
index 000000000..96c19f447
--- /dev/null
+++ b/tests/test_functions.sh
@@ -0,0 +1,12 @@
+function run_tests () {
+ python --version
+ python -c "print('python runs....')"
+
+ if [[ $(which python) == *"$1"* ]]; then
+ echo "found correct python"
+ else
+ echo "Python env name $1 not in Python path $(which python)"
+ exit 99
+ fi
+
+}
\ No newline at end of file
diff --git a/tests/virtualenv_harness.sh b/tests/virtualenv_harness.sh
new file mode 100644
index 000000000..cdc1231c6
--- /dev/null
+++ b/tests/virtualenv_harness.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -e
+
+PROJECT_NAME=$(basename $1)
+CCDS_ROOT=$(dirname $0)
+
+# configure exit / teardown behavior
+function finish {
+ if [[ $(which python) == *"$PROJECT_NAME"* ]]; then
+ deactivate
+ fi
+
+ if [ ! -z `which rmvirtualenv` ]; then
+ rmvirtualenv $PROJECT_NAME
+ elif [ ! -z `which rmvirtualenv.bat` ]; then
+ rmvirtualenv.bat $PROJECT_NAME
+ fi
+}
+trap finish EXIT
+
+# source the steps in the test
+source $CCDS_ROOT/test_functions.sh
+
+# navigate to the generated project and run make commands
+cd $1
+
+if [ -z $TMPDIR ]
+then
+ windowstmpdir=/c/Users/VssAdministrator/AppData/Local/Temp
+ if [ -e $windowstmpdir ]
+ then
+ export TMPDIR=$windowstmpdir
+ fi
+fi
+
+TEMP_ENV_ROOT=$(mktemp -d "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX")
+export WORKON_HOME=$TEMP_ENV_ROOT
+
+if [ ! -z `which virtualenvwrapper.sh` ]
+then
+ source `which virtualenvwrapper.sh`
+fi
+
+make
+make create_environment
+
+# workon not sourced
+
+if [ -e $TEMP_ENV_ROOT/$PROJECT_NAME/bin/activate ]
+then
+ . $TEMP_ENV_ROOT/$PROJECT_NAME/bin/activate
+else
+ . $TEMP_ENV_ROOT/$PROJECT_NAME/Scripts/activate
+fi
+
+make requirements
+
+run_tests $PROJECT_NAME
diff --git a/{{ cookiecutter.repo_name }}/.gitignore b/{{ cookiecutter.repo_name }}/.gitignore
index d7c9832ff..a5df42ba5 100644
--- a/{{ cookiecutter.repo_name }}/.gitignore
+++ b/{{ cookiecutter.repo_name }}/.gitignore
@@ -1,13 +1,25 @@
+# Data
+/data/
+
+# Mac OS-specific storage files
+.DS_Store
+
+# vim
+*.swp
+*.swo
+
+## https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
+*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
-env/
build/
develop-eggs/
dist/
@@ -19,9 +31,12 @@ lib64/
parts/
sdist/
var/
+wheels/
+share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
+MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
@@ -36,12 +51,17 @@ pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
+.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
# Translations
*.mo
@@ -49,41 +69,104 @@ coverage.xml
# Django stuff:
*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
-# Sphinx documentation
-docs/_build/
+# Scrapy stuff:
+.scrapy
+
+# MkDocs documentation
+docs/site/
# PyBuilder
+.pybuilder/
target/
-# DotEnv configuration
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
-# Database
-*.db
-*.rdb
+# Spyder project settings
+.spyderproject
+.spyproject
-# Pycharm
-.idea
+# Rope project settings
+.ropeproject
-# VS Code
-.vscode/
+# mkdocs documentation
+/site
-# Spyder
-.spyproject/
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
-# Jupyter NB Checkpoints
-.ipynb_checkpoints/
+# Pyre type checker
+.pyre/
-# exclude data from source control by default
-/data/
+# pytype static type analyzer
+.pytype/
-# Mac OS-specific storage files
-.DS_Store
+# Cython debug symbols
+cython_debug/
-# vim
-*.swp
-*.swo
-
-# Mypy cache
-.mypy_cache/
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/{{ cookiecutter.repo_name }}/Makefile b/{{ cookiecutter.repo_name }}/Makefile
index cf9406bdf..764ba81cc 100644
--- a/{{ cookiecutter.repo_name }}/Makefile
+++ b/{{ cookiecutter.repo_name }}/Makefile
@@ -1,86 +1,107 @@
-.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3
-
#################################################################################
# GLOBALS #
#################################################################################
-PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
-BUCKET = {{ cookiecutter.s3_bucket }}
-PROFILE = {{ cookiecutter.aws_profile }}
PROJECT_NAME = {{ cookiecutter.repo_name }}
-PYTHON_INTERPRETER = {{ cookiecutter.python_interpreter }}
-
-ifeq (,$(shell which conda))
-HAS_CONDA=False
-else
-HAS_CONDA=True
-endif
+PYTHON_VERSION = {{ cookiecutter.python_version_number }}
+PYTHON_INTERPRETER = python
#################################################################################
# COMMANDS #
#################################################################################
+{% if cookiecutter.dependency_file != 'none' %}
## Install Python Dependencies
-requirements: test_environment
- $(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
+.PHONY: requirements
+requirements:
+ {% if "requirements.txt" == cookiecutter.dependency_file -%}
+ $(PYTHON_INTERPRETER) -m pip install -U pip
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
+ {% elif "environment.yml" == cookiecutter.dependency_file -%}
+ conda env update --name $(PROJECT_NAME) --file environment.yml --prune
+ {% elif "Pipfile" == cookiecutter.dependency_file -%}
+ pipenv install
+ {% endif %}
+{% endif %}
-## Make Dataset
-data: requirements
- $(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
## Delete all compiled Python files
+.PHONY: clean
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
-## Lint using flake8
+## Lint using flake8 and black (use `make format` to do formatting)
+.PHONY: lint
lint:
- flake8 src
-
-## Upload Data to S3
-sync_data_to_s3:
-ifeq (default,$(PROFILE))
- aws s3 sync data/ s3://$(BUCKET)/data/
-else
- aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
-endif
-
-## Download Data from S3
-sync_data_from_s3:
-ifeq (default,$(PROFILE))
- aws s3 sync s3://$(BUCKET)/data/ data/
-else
- aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
-endif
-
+ flake8 {{ cookiecutter.module_name }}
+ isort --check --diff --profile black {{ cookiecutter.module_name }}
+ black --check --config pyproject.toml {{ cookiecutter.module_name }}
+
+## Format source code with black
+.PHONY: format
+format:
+ black --config pyproject.toml {{ cookiecutter.module_name }}
+
+{% if not cookiecutter.dataset_storage.none %}
+## Download Data from storage system
+.PHONY: sync_data_down
+sync_data_down:
+ {% if cookiecutter.dataset_storage.s3 -%}
+ aws s3 sync s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/\
+ data/ {% if cookiecutter.dataset_storage.s3.aws_profile != 'default' %} --profile {{ cookiecutter.dataset_storage.s3.aws_profile }}{% endif %}
+ {% elif cookiecutter.dataset_storage.azure -%}
+ az storage blob download-batch -s {{ cookiecutter.dataset_storage.azure.container }}/data/ \
+ -d data/
+ {% elif cookiecutter.dataset_storage.gcs -%}
+ gsutil -m rsync -r gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/ data/
+ {% endif %}
+
+## Upload Data to storage system
+.PHONY: sync_data_up
+sync_data_up:
+ {% if cookiecutter.dataset_storage.s3 -%}
+ aws s3 sync s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/ data/\
+ {% if cookiecutter.dataset_storage.s3.aws_profile %} --profile $(PROFILE){% endif %}
+ {% elif cookiecutter.dataset_storage.azure -%}
+ az storage blob upload-batch -d {{ cookiecutter.dataset_storage.azure.container }}/data/ \
+ -s data/
+ {% elif cookiecutter.dataset_storage.gcs -%}
+ gsutil -m rsync -r data/ gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/
+ {% endif %}
+{% endif %}
+
+{% if cookiecutter.environment_manager != 'none' %}
## Set up python interpreter environment
+.PHONY: create_environment
create_environment:
-ifeq (True,$(HAS_CONDA))
- @echo ">>> Detected conda, creating conda environment."
-ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
- conda create --name $(PROJECT_NAME) python=3
-else
- conda create --name $(PROJECT_NAME) python=2.7
-endif
- @echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
-else
- $(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
- @echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
- export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
- @bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
+ {% if cookiecutter.environment_manager == 'conda' -%}
+ {% if cookiecutter.dependency_file != 'environment.yml' %}
+ conda create --name $(PROJECT_NAME) python=$(PYTHON_VERSION) -y
+ {% else -%}
+ conda env create --name $(PROJECT_NAME) -f environment.yml
+ {% endif %}
+ @echo ">>> conda env created. Activate with:\nconda activate $(PROJECT_NAME)"
+ {% elif cookiecutter.environment_manager == 'virtualenv' -%}
+ @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
-endif
+ {% elif cookiecutter.environment_manager == 'pipenv' -%}
+ pipenv --python $(PYTHON_VERSION)
+ @echo ">>> New pipenv created. Activate with:\npipenv shell"
+ {% endif %}
+{% endif %}
-## Test python environment is setup correctly
-test_environment:
- $(PYTHON_INTERPRETER) test_environment.py
#################################################################################
# PROJECT RULES #
#################################################################################
-
+{% if cookiecutter.include_code_scaffold == 'Yes' %}
+## Make Dataset
+.PHONY: data
+data: requirements
+ $(PYTHON_INTERPRETER) {{ cookiecutter.module_name }}/data/make_dataset.py
+{% endif %}
#################################################################################
# Self Documenting Commands #
@@ -88,57 +109,14 @@ test_environment:
.DEFAULT_GOAL := help
-# Inspired by
-# sed script explained:
-# /^##/:
-# * save line in hold space
-# * purge line
-# * Loop:
-# * append newline + line to hold space
-# * go to next line
-# * if line starts with doc comment, strip comment character off and loop
-# * remove target prerequisites
-# * append hold space (+ newline) to line
-# * replace newline plus comments by `---`
-# * print line
-# Separate expressions are necessary because labels cannot be delimited by
-# semicolon; see
-.PHONY: help
+define PRINT_HELP_PYSCRIPT
+import re, sys; \
+lines = '\n'.join([line for line in sys.stdin]); \
+matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
+print('Available rules:\n'); \
+print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
+endef
+export PRINT_HELP_PYSCRIPT
+
help:
- @echo "$$(tput bold)Available rules:$$(tput sgr0)"
- @echo
- @sed -n -e "/^## / { \
- h; \
- s/.*//; \
- :doc" \
- -e "H; \
- n; \
- s/^## //; \
- t doc" \
- -e "s/:.*//; \
- G; \
- s/\\n## /---/; \
- s/\\n/ /g; \
- p; \
- }" ${MAKEFILE_LIST} \
- | LC_ALL='C' sort --ignore-case \
- | awk -F '---' \
- -v ncol=$$(tput cols) \
- -v indent=19 \
- -v col_on="$$(tput setaf 6)" \
- -v col_off="$$(tput sgr0)" \
- '{ \
- printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
- n = split($$2, words, " "); \
- line_length = ncol - indent; \
- for (i = 1; i <= n; i++) { \
- line_length -= length(words[i]) + 1; \
- if (line_length <= 0) { \
- line_length = ncol - indent - length(words[i]) - 1; \
- printf "\n%*s ", -indent, " "; \
- } \
- printf "%s ", words[i]; \
- } \
- printf "\n"; \
- }' \
- | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
+ @python -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
diff --git a/{{ cookiecutter.repo_name }}/README.md b/{{ cookiecutter.repo_name }}/README.md
index 78eb6762d..12bfe5d69 100644
--- a/{{ cookiecutter.repo_name }}/README.md
+++ b/{{ cookiecutter.repo_name }}/README.md
@@ -1,57 +1,62 @@
-{{cookiecutter.project_name}}
-==============================
+# {{cookiecutter.project_name}}
-{{cookiecutter.description}}
+
+
+
-Project Organization
-------------
+{{cookiecutter.description}}
- ├── LICENSE
- ├── Makefile <- Makefile with commands like `make data` or `make train`
- ├── README.md <- The top-level README for developers using this project.
- ├── data
- │ ├── external <- Data from third party sources.
- │ ├── interim <- Intermediate data that has been transformed.
- │ ├── processed <- The final, canonical data sets for modeling.
- │ └── raw <- The original, immutable data dump.
- │
- ├── docs <- A default Sphinx project; see sphinx-doc.org for details
- │
- ├── models <- Trained and serialized models, model predictions, or model summaries
- │
- ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
- │ the creator's initials, and a short `-` delimited description, e.g.
- │ `1.0-jqp-initial-data-exploration`.
- │
- ├── references <- Data dictionaries, manuals, and all other explanatory materials.
- │
- ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
- │ └── figures <- Generated graphics and figures to be used in reporting
- │
- ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
- │ generated with `pip freeze > requirements.txt`
- │
- ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported
- ├── src <- Source code for use in this project.
- │ ├── __init__.py <- Makes src a Python module
- │ │
- │ ├── data <- Scripts to download or generate data
- │ │ └── make_dataset.py
- │ │
- │ ├── features <- Scripts to turn raw data into features for modeling
- │ │ └── build_features.py
- │ │
- │ ├── models <- Scripts to train models and then use trained models to make
- │ │ │ predictions
- │ │ ├── predict_model.py
- │ │ └── train_model.py
- │ │
- │ └── visualization <- Scripts to create exploratory and results oriented visualizations
- │ └── visualize.py
- │
- └── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
+## Project Organization
+```
+├── LICENSE <- Open-source license if one is chosen
+├── Makefile <- Makefile with convenience commands like `make data` or `make train`
+├── README.md <- The top-level README for developers using this project.
+├── data
+│ ├── external <- Data from third party sources.
+│ ├── interim <- Intermediate data that has been transformed.
+│ ├── processed <- The final, canonical data sets for modeling.
+│ └── raw <- The original, immutable data dump.
+│
+├── docs <- A default mkdocs project; see mkdocs.org for details
+│
+├── models <- Trained and serialized models, model predictions, or model summaries
+│
+├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
+│ the creator's initials, and a short `-` delimited description, e.g.
+│ `1.0-jqp-initial-data-exploration`.
+│
+├── pyproject.toml <- Project configuration file with package metadata for {{ cookiecutter.module_name }}
+│ and configuration for tools like black
+│
+├── references <- Data dictionaries, manuals, and all other explanatory materials.
+│
+├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
+│ └── figures <- Generated graphics and figures to be used in reporting
+│
+├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
+│ generated with `pip freeze > requirements.txt`
+│
+├── setup.cfg <- Configuration file for flake8
+│
+└── {{ cookiecutter.module_name }} <- Source code for use in this project.
+ │
+ ├── __init__.py <- Makes {{ cookiecutter.module_name }} a Python module
+ │
+ ├── data <- Scripts to download or generate data
+ │ └── make_dataset.py
+ │
+ ├── features <- Scripts to turn raw data into features for modeling
+ │ └── build_features.py
+ │
+ ├── models <- Scripts to train models and then use trained models to make
+ │ │ predictions
+ │ ├── predict_model.py
+ │ └── train_model.py
+ │
+ └── visualization <- Scripts to create exploratory and results oriented visualizations
+ └── visualize.py
+```
--------
-
diff --git a/{{ cookiecutter.repo_name }}/src/data/.gitkeep b/{{ cookiecutter.repo_name }}/docs/.gitkeep
similarity index 100%
rename from {{ cookiecutter.repo_name }}/src/data/.gitkeep
rename to {{ cookiecutter.repo_name }}/docs/.gitkeep
diff --git a/{{ cookiecutter.repo_name }}/docs/Makefile b/{{ cookiecutter.repo_name }}/docs/Makefile
deleted file mode 100644
index 7dab2b193..000000000
--- a/{{ cookiecutter.repo_name }}/docs/Makefile
+++ /dev/null
@@ -1,153 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS =
-SPHINXBUILD = sphinx-build
-PAPER =
-BUILDDIR = _build
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
-
-help:
- @echo "Please use \`make ' where is one of"
- @echo " html to make standalone HTML files"
- @echo " dirhtml to make HTML files named index.html in directories"
- @echo " singlehtml to make a single large HTML file"
- @echo " pickle to make pickle files"
- @echo " json to make JSON files"
- @echo " htmlhelp to make HTML files and a HTML help project"
- @echo " qthelp to make HTML files and a qthelp project"
- @echo " devhelp to make HTML files and a Devhelp project"
- @echo " epub to make an epub"
- @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
- @echo " latexpdf to make LaTeX files and run them through pdflatex"
- @echo " text to make text files"
- @echo " man to make manual pages"
- @echo " texinfo to make Texinfo files"
- @echo " info to make Texinfo files and run them through makeinfo"
- @echo " gettext to make PO message catalogs"
- @echo " changes to make an overview of all changed/added/deprecated items"
- @echo " linkcheck to check all external links for integrity"
- @echo " doctest to run all doctests embedded in the documentation (if enabled)"
-
-clean:
- -rm -rf $(BUILDDIR)/*
-
-html:
- $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-dirhtml:
- $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
- @echo
- @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-singlehtml:
- $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
- @echo
- @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-pickle:
- $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
- @echo
- @echo "Build finished; now you can process the pickle files."
-
-json:
- $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
- @echo
- @echo "Build finished; now you can process the JSON files."
-
-htmlhelp:
- $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
- @echo
- @echo "Build finished; now you can run HTML Help Workshop with the" \
- ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-qthelp:
- $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
- @echo
- @echo "Build finished; now you can run "qcollectiongenerator" with the" \
- ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/{{ cookiecutter.repo_name }}.qhcp"
- @echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/{{ cookiecutter.repo_name }}.qhc"
-
-devhelp:
- $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
- @echo
- @echo "Build finished."
- @echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/{{ cookiecutter.repo_name }}"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/{{ cookiecutter.repo_name }}"
- @echo "# devhelp"
-
-epub:
- $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
- @echo
- @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-latex:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo
- @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
- @echo "Run \`make' in that directory to run these through (pdf)latex" \
- "(use \`make latexpdf' here to do that automatically)."
-
-latexpdf:
- $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
- @echo "Running LaTeX files through pdflatex..."
- $(MAKE) -C $(BUILDDIR)/latex all-pdf
- @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-text:
- $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
- @echo
- @echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-man:
- $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
- @echo
- @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-texinfo:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo
- @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
- @echo "Run \`make' in that directory to run these through makeinfo" \
- "(use \`make info' here to do that automatically)."
-
-info:
- $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
- @echo "Running Texinfo files through makeinfo..."
- make -C $(BUILDDIR)/texinfo info
- @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-gettext:
- $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
- @echo
- @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-changes:
- $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
- @echo
- @echo "The overview file is in $(BUILDDIR)/changes."
-
-linkcheck:
- $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
- @echo
- @echo "Link check complete; look for any errors in the above output " \
- "or in $(BUILDDIR)/linkcheck/output.txt."
-
-doctest:
- $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
- @echo "Testing of doctests in the sources finished, look at the " \
- "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/{{ cookiecutter.repo_name }}/docs/commands.rst b/{{ cookiecutter.repo_name }}/docs/commands.rst
deleted file mode 100644
index 56e9e4ac7..000000000
--- a/{{ cookiecutter.repo_name }}/docs/commands.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Commands
-========
-
-The Makefile contains the central entry points for common tasks related to this project.
-
-Syncing data to S3
-^^^^^^^^^^^^^^^^^^
-
-* `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://{{ cookiecutter.s3_bucket }}/data/`.
-* `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://{{ cookiecutter.s3_bucket }}/data/` to `data/`.
diff --git a/{{ cookiecutter.repo_name }}/docs/conf.py b/{{ cookiecutter.repo_name }}/docs/conf.py
deleted file mode 100644
index ead8ef1f5..000000000
--- a/{{ cookiecutter.repo_name }}/docs/conf.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# {{ cookiecutter.project_name }} documentation build configuration file, created by
-# sphinx-quickstart.
-#
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import os
-import sys
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-# sys.path.insert(0, os.path.abspath('.'))
-
-# -- General configuration -----------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = []
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-# source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'{{ cookiecutter.project_name }}'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '0.1'
-# The full version, including alpha/beta/rc tags.
-release = '0.1'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-# language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-# today = ''
-# Else, today_fmt is used as the format for a strftime call.
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
-
-# The reST default role (used for this markup: `text`) to use for all documents.
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-
-# -- Options for HTML output ---------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = 'default'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents. If None, it defaults to
-# " v documentation".
-# html_title = None
-
-# A shorter title for the navigation bar. Default is the same as html_title.
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-# html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-# html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-# html_domain_indices = True
-
-# If false, no index is generated.
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a tag referring to it. The value of this option must be the
-# base URL from which the finished HTML is served.
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = '{{ cookiecutter.repo_name }}doc'
-
-
-# -- Options for LaTeX output --------------------------------------------------
-
-latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- # 'papersize': 'letterpaper',
-
- # The font size ('10pt', '11pt' or '12pt').
- # 'pointsize': '10pt',
-
- # Additional stuff for the LaTeX preamble.
- # 'preamble': '',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
- ('index',
- '{{ cookiecutter.repo_name }}.tex',
- u'{{ cookiecutter.project_name }} Documentation',
- u"{{ cookiecutter.author_name }}", 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-# latex_appendices = []
-
-# If false, no module index is generated.
-# latex_domain_indices = True
-
-
-# -- Options for manual page output --------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- ('index', '{{ cookiecutter.repo_name }}', u'{{ cookiecutter.project_name }} Documentation',
- [u"{{ cookiecutter.author_name }}"], 1)
-]
-
-# If true, show URL addresses after external links.
-# man_show_urls = False
-
-
-# -- Options for Texinfo output ------------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- ('index', '{{ cookiecutter.repo_name }}', u'{{ cookiecutter.project_name }} Documentation',
- u"{{ cookiecutter.author_name }}", '{{ cookiecutter.project_name }}',
- '{{ cookiecutter.description }}', 'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-# texinfo_show_urls = 'footnote'
diff --git a/{{ cookiecutter.repo_name }}/docs/index.rst b/{{ cookiecutter.repo_name }}/docs/index.rst
deleted file mode 100644
index 88e31e98b..000000000
--- a/{{ cookiecutter.repo_name }}/docs/index.rst
+++ /dev/null
@@ -1,24 +0,0 @@
-.. {{ cookiecutter.project_name }} documentation master file, created by
- sphinx-quickstart.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-{{ cookiecutter.project_name }} documentation!
-==============================================
-
-Contents:
-
-.. toctree::
- :maxdepth: 2
-
- getting-started
- commands
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/{{ cookiecutter.repo_name }}/docs/make.bat b/{{ cookiecutter.repo_name }}/docs/make.bat
deleted file mode 100644
index aa1d05a27..000000000
--- a/{{ cookiecutter.repo_name }}/docs/make.bat
+++ /dev/null
@@ -1,190 +0,0 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
- set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
- set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
- :help
- echo.Please use `make ^` where ^ is one of
- echo. html to make standalone HTML files
- echo. dirhtml to make HTML files named index.html in directories
- echo. singlehtml to make a single large HTML file
- echo. pickle to make pickle files
- echo. json to make JSON files
- echo. htmlhelp to make HTML files and a HTML help project
- echo. qthelp to make HTML files and a qthelp project
- echo. devhelp to make HTML files and a Devhelp project
- echo. epub to make an epub
- echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
- echo. text to make text files
- echo. man to make manual pages
- echo. texinfo to make Texinfo files
- echo. gettext to make PO message catalogs
- echo. changes to make an overview over all changed/added/deprecated items
- echo. linkcheck to check all external links for integrity
- echo. doctest to run all doctests embedded in the documentation if enabled
- goto end
-)
-
-if "%1" == "clean" (
- for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
- del /q /s %BUILDDIR%\*
- goto end
-)
-
-if "%1" == "html" (
- %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/html.
- goto end
-)
-
-if "%1" == "dirhtml" (
- %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
- goto end
-)
-
-if "%1" == "singlehtml" (
- %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
- goto end
-)
-
-if "%1" == "pickle" (
- %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the pickle files.
- goto end
-)
-
-if "%1" == "json" (
- %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can process the JSON files.
- goto end
-)
-
-if "%1" == "htmlhelp" (
- %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
- goto end
-)
-
-if "%1" == "qthelp" (
- %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
- echo.^> qcollectiongenerator %BUILDDIR%\qthelp\{{ cookiecutter.repo_name }}.qhcp
- echo.To view the help file:
- echo.^> assistant -collectionFile %BUILDDIR%\qthelp\{{ cookiecutter.repo_name }}.ghc
- goto end
-)
-
-if "%1" == "devhelp" (
- %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished.
- goto end
-)
-
-if "%1" == "epub" (
- %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The epub file is in %BUILDDIR%/epub.
- goto end
-)
-
-if "%1" == "latex" (
- %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
- goto end
-)
-
-if "%1" == "text" (
- %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The text files are in %BUILDDIR%/text.
- goto end
-)
-
-if "%1" == "man" (
- %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The manual pages are in %BUILDDIR%/man.
- goto end
-)
-
-if "%1" == "texinfo" (
- %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
- goto end
-)
-
-if "%1" == "gettext" (
- %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
- if errorlevel 1 exit /b 1
- echo.
- echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
- goto end
-)
-
-if "%1" == "changes" (
- %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
- if errorlevel 1 exit /b 1
- echo.
- echo.The overview file is in %BUILDDIR%/changes.
- goto end
-)
-
-if "%1" == "linkcheck" (
- %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
- if errorlevel 1 exit /b 1
- echo.
- echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
- goto end
-)
-
-if "%1" == "doctest" (
- %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
- if errorlevel 1 exit /b 1
- echo.
- echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
- goto end
-)
-
-:end
diff --git a/{{ cookiecutter.repo_name }}/docs/mkdocs/README.md b/{{ cookiecutter.repo_name }}/docs/mkdocs/README.md
new file mode 100644
index 000000000..79c146859
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/docs/mkdocs/README.md
@@ -0,0 +1,12 @@
+Generating the docs
+----------
+
+Use [mkdocs](http://www.mkdocs.org/) structure to update the documentation.
+
+Build locally with:
+
+ mkdocs build
+
+Serve locally with:
+
+ mkdocs serve
diff --git a/{{ cookiecutter.repo_name }}/docs/getting-started.rst b/{{ cookiecutter.repo_name }}/docs/mkdocs/docs/getting-started.md
similarity index 100%
rename from {{ cookiecutter.repo_name }}/docs/getting-started.rst
rename to {{ cookiecutter.repo_name }}/docs/mkdocs/docs/getting-started.md
diff --git a/{{ cookiecutter.repo_name }}/docs/mkdocs/docs/index.md b/{{ cookiecutter.repo_name }}/docs/mkdocs/docs/index.md
new file mode 100644
index 000000000..fa6b7ed33
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/docs/mkdocs/docs/index.md
@@ -0,0 +1,23 @@
+# {{ cookiecutter.project_name }} documentation!
+{% if cookiecutter.project_description is not none %}
+## Description
+
+{{ cookiecutter.description}}
+{% endif %}
+## Commands
+
+The Makefile contains the central entry points for common tasks related to this project.
+{% if not cookiecutter.dataset_storage.none %}
+### Syncing data to cloud storage
+
+{% if cookiecutter.dataset_storage.s3 -%}
+* `make sync_data_up` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/`.
+* `make sync_data_down` will use `aws s3 sync` to recursively sync files from `s3://{{ cookiecutter.dataset_storage.s3.bucket }}/data/` to `data/`.
+{% elif cookiecutter.dataset_storage.azure -%}
+* `make sync_data_up` will use `az storage blob upload-batch -d` to recursively sync files in `data/` up to `{{ cookiecutter.dataset_storage.azure.container }}/data/`.
+* `make sync_data_down` will use `az storage blob upload-batch -d` to recursively sync files from `{{ cookiecutter.dataset_storage.azure.container }}/data/` to `data/`.
+{% elif cookiecutter.dataset_storage.gcs -%}
+* `make sync_data_up` will use `gsutil rsync` to recursively sync files in `data/` up to `gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/`.
+* `make sync_data_down` will use `gsutil rsync` to recursively sync files in `gs://{{ cookiecutter.dataset_storage.gcs.bucket }}/data/` to `data/`.
+{% endif %}
+{% endif %}
diff --git a/{{ cookiecutter.repo_name }}/docs/mkdocs/mkdocs.yml b/{{ cookiecutter.repo_name }}/docs/mkdocs/mkdocs.yml
new file mode 100644
index 000000000..77aa94c1a
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/docs/mkdocs/mkdocs.yml
@@ -0,0 +1,4 @@
+site_name: {{ cookiecutter.project_name }}
+# {% if cookiecutter.author_name %}
+site_author: {{ cookiecutter.author_name }}
+# {% endif %}
\ No newline at end of file
diff --git a/{{ cookiecutter.repo_name }}/pyproject.toml b/{{ cookiecutter.repo_name }}/pyproject.toml
new file mode 100644
index 000000000..5758a5781
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/pyproject.toml
@@ -0,0 +1,32 @@
+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = {{ cookiecutter.module_name|tojson }}
+version = "0.0.1"
+description = {{ cookiecutter.description|tojson }}
+authors = [
+ { name = {{ cookiecutter.author_name|tojson }} },
+]
+{% if cookiecutter.open_source_license != 'No license file' %}license = { file = "LICENSE" }{% endif %}
+readme = "README.md"
+classifiers = [
+ "Programming Language :: Python :: 3",
+ {% if cookiecutter.open_source_license == 'MIT' %}"License :: OSI Approved :: MIT License"{% elif cookiecutter.open_source_license == 'BSD-3-Clause' %}"License :: OSI Approved :: BSD License"{% endif %}
+]
+requires-python = "~={{ cookiecutter.python_version_number }}"
+
+[tool.black]
+line-length = 99
+include = '\.pyi?$'
+exclude = '''
+/(
+ \.git
+ | \.venv
+)/
+'''
+
+[tool.ruff.lint.isort]
+known_first_party = ["{{ cookiecutter.module_name }}"]
+force_sort_within_sections = true
diff --git a/{{ cookiecutter.repo_name }}/requirements.txt b/{{ cookiecutter.repo_name }}/requirements.txt
deleted file mode 100644
index 10a89cb6e..000000000
--- a/{{ cookiecutter.repo_name }}/requirements.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# local package
--e .
-
-# external requirements
-click
-Sphinx
-coverage
-awscli
-flake8
-python-dotenv>=0.5.1
-{% if cookiecutter.python_interpreter != 'python3' %}
-
-# backwards compatibility
-pathlib2
-{% endif %}
\ No newline at end of file
diff --git a/{{ cookiecutter.repo_name }}/setup.cfg b/{{ cookiecutter.repo_name }}/setup.cfg
new file mode 100644
index 000000000..7fb10f5d0
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/setup.cfg
@@ -0,0 +1,4 @@
+[flake8]
+ignore = E731,E266,E501,C901,W503
+max-line-length = 99
+exclude = .git,notebooks,references,models,data
diff --git a/{{ cookiecutter.repo_name }}/setup.py b/{{ cookiecutter.repo_name }}/setup.py
deleted file mode 100644
index 3fef006e2..000000000
--- a/{{ cookiecutter.repo_name }}/setup.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from setuptools import find_packages, setup
-
-setup(
- name='src',
- packages=find_packages(),
- version='0.1.0',
- description='{{ cookiecutter.description }}',
- author='{{ cookiecutter.author_name }}',
- license='{% if cookiecutter.open_source_license == 'MIT' %}MIT{% elif cookiecutter.open_source_license == 'BSD-3-Clause' %}BSD-3{% endif %}',
-)
diff --git a/{{ cookiecutter.repo_name }}/src/data/make_dataset.py b/{{ cookiecutter.repo_name }}/src/data/make_dataset.py
deleted file mode 100644
index 96b377a23..000000000
--- a/{{ cookiecutter.repo_name }}/src/data/make_dataset.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-import click
-import logging
-from pathlib import Path
-from dotenv import find_dotenv, load_dotenv
-
-
-@click.command()
-@click.argument('input_filepath', type=click.Path(exists=True))
-@click.argument('output_filepath', type=click.Path())
-def main(input_filepath, output_filepath):
- """ Runs data processing scripts to turn raw data from (../raw) into
- cleaned data ready to be analyzed (saved in ../processed).
- """
- logger = logging.getLogger(__name__)
- logger.info('making final data set from raw data')
-
-
-if __name__ == '__main__':
- log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
- logging.basicConfig(level=logging.INFO, format=log_fmt)
-
- # not used in this stub but often useful for finding various files
- project_dir = Path(__file__).resolve().parents[2]
-
- # find .env automagically by walking up directories until it's found, then
- # load up the .env entries as environment variables
- load_dotenv(find_dotenv())
-
- main()
diff --git a/{{ cookiecutter.repo_name }}/src/features/.gitkeep b/{{ cookiecutter.repo_name }}/src/features/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/features/build_features.py b/{{ cookiecutter.repo_name }}/src/features/build_features.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/models/.gitkeep b/{{ cookiecutter.repo_name }}/src/models/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/models/__init__.py b/{{ cookiecutter.repo_name }}/src/models/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/models/predict_model.py b/{{ cookiecutter.repo_name }}/src/models/predict_model.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/models/train_model.py b/{{ cookiecutter.repo_name }}/src/models/train_model.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/visualization/.gitkeep b/{{ cookiecutter.repo_name }}/src/visualization/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/visualization/__init__.py b/{{ cookiecutter.repo_name }}/src/visualization/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/src/visualization/visualize.py b/{{ cookiecutter.repo_name }}/src/visualization/visualize.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/{{ cookiecutter.repo_name }}/test_environment.py b/{{ cookiecutter.repo_name }}/test_environment.py
deleted file mode 100644
index 0b0abeaa1..000000000
--- a/{{ cookiecutter.repo_name }}/test_environment.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import sys
-
-REQUIRED_PYTHON = "{{ cookiecutter.python_interpreter }}"
-
-
-def main():
- system_major = sys.version_info.major
- if REQUIRED_PYTHON == "python":
- required_major = 2
- elif REQUIRED_PYTHON == "python3":
- required_major = 3
- else:
- raise ValueError("Unrecognized python interpreter: {}".format(
- REQUIRED_PYTHON))
-
- if system_major != required_major:
- raise TypeError(
- "This project requires Python {}. Found: Python {}".format(
- required_major, sys.version))
- else:
- print(">>> Development environment passes all tests!")
-
-
-if __name__ == '__main__':
- main()
diff --git a/{{ cookiecutter.repo_name }}/tox.ini b/{{ cookiecutter.repo_name }}/tox.ini
deleted file mode 100644
index c32fbd859..000000000
--- a/{{ cookiecutter.repo_name }}/tox.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 79
-max-complexity = 10
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py
new file mode 100644
index 000000000..9680e9410
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/__init__.py
@@ -0,0 +1 @@
+import config # noqa: F401
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py
new file mode 100644
index 000000000..8d77c2297
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/config.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+from dotenv import load_dotenv
+from loguru import logger
+
+# Load environment variables from .env file if it exists
+load_dotenv()
+
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+
+DATA_DIR = PROJ_ROOT / "data"
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+
+MODELS_DIR = PROJ_ROOT / "models"
+
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+ from tqdm import tqdm
+
+ logger.remove(0)
+ logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except ModuleNotFoundError:
+ pass
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py
new file mode 100644
index 000000000..04f3ed6e3
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/dataset.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+ input_path: Path = RAW_DATA_DIR / "dataset.csv",
+ output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+ # ----------------------------------------------
+):
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
+ logger.info("Processing dataset...")
+ for i in tqdm(range(10), total=10):
+ if i == 5:
+ logger.info("Something happened for iteration 5.")
+ logger.success("Processing dataset complete.")
+ # -----------------------------------------
+
+
+if __name__ == "__main__":
+ app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py
new file mode 100644
index 000000000..20da88b85
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/features.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+ output_path: Path = PROCESSED_DATA_DIR / "features.csv",
+ # -----------------------------------------
+):
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
+ logger.info("Generating features from dataset...")
+ for i in tqdm(range(10), total=10):
+ if i == 5:
+ logger.info("Something happened for iteration 5.")
+ logger.success("Features generation complete.")
+ # -----------------------------------------
+
+
+if __name__ == "__main__":
+ app()
diff --git a/{{ cookiecutter.repo_name }}/src/features/__init__.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/__init__.py
similarity index 100%
rename from {{ cookiecutter.repo_name }}/src/features/__init__.py
rename to {{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/__init__.py
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py
new file mode 100644
index 000000000..e0efdd430
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/predict.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+ features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
+ model_path: Path = MODELS_DIR / "model.pkl",
+ predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
+ # -----------------------------------------
+):
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
+ logger.info("Performing inference for model...")
+ for i in tqdm(range(10), total=10):
+ if i == 5:
+ logger.info("Something happened for iteration 5.")
+ logger.success("Inference complete.")
+ # -----------------------------------------
+
+
+if __name__ == "__main__":
+ app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py
new file mode 100644
index 000000000..4c6ee8f4b
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/modeling/train.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import MODELS_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+ features_path: Path = PROCESSED_DATA_DIR / "features.csv",
+ labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
+ model_path: Path = MODELS_DIR / "model.pkl",
+ # -----------------------------------------
+):
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
+ logger.info("Training some model...")
+ for i in tqdm(range(10), total=10):
+ if i == 5:
+ logger.info("Something happened for iteration 5.")
+ logger.success("Modeling training complete.")
+ # -----------------------------------------
+
+
+if __name__ == "__main__":
+ app()
diff --git a/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py
new file mode 100644
index 000000000..dad5c0cea
--- /dev/null
+++ b/{{ cookiecutter.repo_name }}/{{ cookiecutter.module_name }}/plots.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import typer
+from loguru import logger
+from tqdm import tqdm
+
+from {{ cookiecutter.module_name }}.config import FIGURES_DIR, PROCESSED_DATA_DIR
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+ output_path: Path = FIGURES_DIR / "plot.png",
+ # -----------------------------------------
+):
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
+ logger.info("Generating plot from data...")
+ for i in tqdm(range(10), total=10):
+ if i == 5:
+ logger.info("Something happened for iteration 5.")
+ logger.success("Plot generation complete.")
+ # -----------------------------------------
+
+
+if __name__ == "__main__":
+ app()