From eb85b718d8ba0f68a05a1e69a3d98fb4bff9f94e Mon Sep 17 00:00:00 2001 From: alanmottaganem Date: Tue, 18 May 2021 12:11:23 -0300 Subject: [PATCH] first commit --- .gitignore | 175 ++++++++++ CONTRIBUTING.md | 33 ++ LICENSE | 201 +++++++++++ MANIFEST.in | 5 + Makefile | 37 ++ README.md | 5 + analysis/Data Analysis Template.ipynb | 70 ++++ data/.gitkeep | 0 docker-compose.yml | 36 ++ docs/.gitignore | 1 + docs/Gemfile | 9 + docs/Gemfile.lock | 269 +++++++++++++++ docs/_config.yml | 66 ++++ docs/_data/topnav.yml | 10 + docs/feed.xml | 32 ++ docs/sitemap.xml | 24 ++ html_utf8encode.bat | 1 + index.ipynb | 382 +++++++++++++++++++++ log/.gitkeep | 0 notebooks_dev/DE_00_extract.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DE_01_transform.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DE_02_load.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DS_00_load.ipynb | 395 ++++++++++++++++++++++ notebooks_dev/DS_01_preprocess.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DS_02_build_features.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DS_03_modelling.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DS_04_validate.ipynb | 394 +++++++++++++++++++++ notebooks_dev/DS_05_postprocess.ipynb | 394 +++++++++++++++++++++ notebooks_dev/MLE_00_pipeline_utils.ipynb | 394 +++++++++++++++++++++ notebooks_dev/MLE_01_serving.ipynb | 394 +++++++++++++++++++++ pipeline-scripts/Setup.py | 0 requirements.txt | 0 settings.ini | 77 +++++ setup.py | 47 +++ src/.gitkeep | 0 src/__init__.py | 1 + src/_nbdev.py | 24 ++ src/de__extract.py | 34 ++ src/de__load.py | 34 ++ src/de__transform.py | 34 ++ src/ds__build_features.py | 34 ++ src/ds__load.py | 34 ++ src/ds__modelling.py | 34 ++ src/ds__postprocess.py | 34 ++ src/ds__preprocess.py | 34 ++ src/ds__validate.py | 34 ++ src/mle__pipeline_utils.py | 34 ++ src/mle__serving.py | 34 ++ utf8encode.py | 33 ++ 49 files changed, 6247 insertions(+) create mode 100644 .gitignore create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 analysis/Data Analysis Template.ipynb create mode 100644 data/.gitkeep create mode 100644 docker-compose.yml create mode 100644 docs/.gitignore create mode 100644 docs/Gemfile create mode 100644 docs/Gemfile.lock create mode 100644 docs/_config.yml create mode 100644 docs/_data/topnav.yml create mode 100644 docs/feed.xml create mode 100644 docs/sitemap.xml create mode 100644 html_utf8encode.bat create mode 100644 index.ipynb create mode 100644 log/.gitkeep create mode 100644 notebooks_dev/DE_00_extract.ipynb create mode 100644 notebooks_dev/DE_01_transform.ipynb create mode 100644 notebooks_dev/DE_02_load.ipynb create mode 100644 notebooks_dev/DS_00_load.ipynb create mode 100644 notebooks_dev/DS_01_preprocess.ipynb create mode 100644 notebooks_dev/DS_02_build_features.ipynb create mode 100644 notebooks_dev/DS_03_modelling.ipynb create mode 100644 notebooks_dev/DS_04_validate.ipynb create mode 100644 notebooks_dev/DS_05_postprocess.ipynb create mode 100644 notebooks_dev/MLE_00_pipeline_utils.ipynb create mode 100644 notebooks_dev/MLE_01_serving.ipynb create mode 100644 pipeline-scripts/Setup.py create mode 100644 requirements.txt create mode 100644 settings.ini create mode 100644 setup.py create mode 100644 src/.gitkeep create mode 100644 src/__init__.py create mode 100644 src/_nbdev.py create mode 100644 src/de__extract.py create mode 100644 src/de__load.py create mode 100644 src/de__transform.py create mode 100644 src/ds__build_features.py create mode 100644 src/ds__load.py create mode 100644 src/ds__modelling.py create mode 100644 src/ds__postprocess.py create mode 100644 src/ds__preprocess.py create mode 100644 src/ds__validate.py create mode 100644 src/mle__pipeline_utils.py create mode 100644 src/mle__serving.py create mode 100644 utf8encode.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4c9326 --- /dev/null +++ b/.gitignore @@ -0,0 +1,175 @@ +### custom gitignore + +# Python secondary files +__pycache__/ +*.py[cod] +*$py.class +.analysis/.ipynb_checkpoints + +# VS Code config file +.vscode/ + +# Datasets +data/* +!data/.gitkeep +data/input/* +data/output/* +!data/input/.gitkeep +!data/output/.gitkeep +*.csv +*.xlsx +*.xlsb +*.xls + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + + +### nbdev default .gitignore +*.bak +.gitattributes +.last_checked +.gitconfig +*.bak +*.log +*~ +~* +_tmp* +tmp* +tags + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +.vscode +*.swp + +# osx generated files +.DS_Store +.DS_Store? +.Trashes +ehthumbs.db +Thumbs.db +.idea + +# pytest +.pytest_cache + +# tools/trust-doc-nbs +docs_src/.last_checked + +# symlinks to fastai +docs_src/fastai +tools/fastai + +# link checker +checklink/cookies.txt + +# .gitconfig is now autogenerated +.gitconfig + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..38fbde5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,33 @@ +# How to contribute + +## How to get started + +Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it: +``` +nbdev_install_git_hooks +``` + +## Did you find a bug? + +* Ensure the bug was not already reported by searching on GitHub under Issues. +* If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. +* Be sure to add the complete error messages. + +#### Did you write a patch that fixes a bug? + +* Open a new GitHub pull request with the patch. +* Ensure that your PR includes a test that fails without your patch, and pass with it. +* Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. + +## PR submission guidelines + +* Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused. +* Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected. +* Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can. +* Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project. +* If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another. + +## Do you want to contribute to the documentation? + +* Docs are automatically created from the notebooks in the nbs folder. + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..5c0e7ce --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include settings.ini +include LICENSE +include CONTRIBUTING.md +include README.md +recursive-exclude * __pycache__ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..75e3a52 --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +.ONESHELL: +SHELL := /bin/bash +SRC = $(wildcard notebooks_dev/*.ipynb) + +all: src docs + +src: $(SRC) + nbdev_build_lib + touch src + +sync: + nbdev_update_lib + +docs_serve: docs + cd docs && bundle exec jekyll serve + +docs: $(SRC) + nbdev_build_docs + touch docs + +test: + nbdev_test_nbs + +release: pypi conda_release + nbdev_bump_version + +conda_release: + fastrelease_conda_package + +pypi: dist + twine upload --repository pypi dist/* + +dist: clean + python setup.py sdist bdist_wheel + +clean: + rm -rf dist \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..244db22 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# nbdev template + +Use this template to more easily create your [nbdev](https://nbdev.fast.ai/) project. + +_If you are using an older version of this template, and want to upgrade to the theme-based version, see [this helper script](https://gist.github.com/hamelsmu/977e82a23dcd8dcff9058079cb4a8f18) (more explanation of what this means is contained in the link to the script)_. diff --git a/analysis/Data Analysis Template.ipynb b/analysis/Data Analysis Template.ipynb new file mode 100644 index 0000000..8c005f0 --- /dev/null +++ b/analysis/Data Analysis Template.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# this folder should contain analysis/EDA/scratch notebooks, since they will not be included in the `docs` neither in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d9b2c52 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,36 @@ +version: "3" +services: + fastai: &fastai + restart: unless-stopped + working_dir: /data + image: fastai/codespaces + logging: + driver: json-file + options: + max-size: 50m + stdin_open: true + tty: true + volumes: + - .:/data/ + + notebook: + <<: *fastai + command: bash -c "pip install -e . && jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port=8080 --NotebookApp.token='' --NotebookApp.password=''" + ports: + - "8080:8080" + + watcher: + <<: *fastai + command: watchmedo shell-command --command nbdev_build_docs --pattern *.ipynb --recursive --drop + network_mode: host # for GitHub Codespaces https://github.com/features/codespaces/ + + jekyll: + <<: *fastai + ports: + - "4000:4000" + command: > + bash -c "cp -r docs_src docs + && pip install . + && nbdev_build_docs && cd docs + && bundle i + && chmod -R u+rwx . && bundle exec jekyll serve --host 0.0.0.0" diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..57510a2 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_site/ diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 0000000..f2509a4 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,9 @@ +source "https://rubygems.org" + +gem 'github-pages', group: :jekyll_plugins + +# Added at 2019-11-25 10:11:40 -0800 by jhoward: +gem "nokogiri", "< 1.11.1" +gem "jekyll", ">= 3.7" +gem "kramdown", ">= 2.3.0" +gem "jekyll-remote-theme" diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock new file mode 100644 index 0000000..271bfeb --- /dev/null +++ b/docs/Gemfile.lock @@ -0,0 +1,269 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (6.0.3.4) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + zeitwerk (~> 2.2, >= 2.2.2) + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.11.1) + colorator (1.1.0) + commonmarker (0.17.13) + ruby-enum (~> 0.5) + concurrent-ruby (1.1.7) + dnsruby (1.61.5) + simpleidn (~> 0.1) + em-websocket (0.5.2) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0.6.0) + ethon (0.12.0) + ffi (>= 1.3.0) + eventmachine (1.2.7) + execjs (2.7.0) + faraday (1.3.0) + faraday-net_http (~> 1.0) + multipart-post (>= 1.2, < 3) + ruby2_keywords + faraday-net_http (1.0.1) + ffi (1.14.2) + forwardable-extended (2.6.0) + gemoji (3.0.1) + github-pages (209) + github-pages-health-check (= 1.16.1) + jekyll (= 3.9.0) + jekyll-avatar (= 0.7.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.1.6) + jekyll-default-layout (= 0.1.4) + jekyll-feed (= 0.15.1) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.13.0) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.2) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.6.1) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.1.1) + jekyll-theme-cayman (= 0.1.1) + jekyll-theme-dinky (= 0.1.1) + jekyll-theme-hacker (= 0.1.2) + jekyll-theme-leap-day (= 0.1.1) + jekyll-theme-merlot (= 0.1.1) + jekyll-theme-midnight (= 0.1.1) + jekyll-theme-minimal (= 0.1.1) + jekyll-theme-modernist (= 0.1.1) + jekyll-theme-primer (= 0.5.4) + jekyll-theme-slate (= 0.1.1) + jekyll-theme-tactile (= 0.1.1) + jekyll-theme-time-machine (= 0.1.1) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.12.0) + kramdown (= 2.3.0) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.3) + mercenary (~> 0.3) + minima (= 2.5.1) + nokogiri (>= 1.10.4, < 2.0) + rouge (= 3.23.0) + terminal-table (~> 1.4) + github-pages-health-check (1.16.1) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (~> 4.0) + public_suffix (~> 3.0) + typhoeus (~> 1.3) + html-pipeline (2.14.0) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.6.0) + i18n (0.9.5) + concurrent-ruby (~> 1.0) + jekyll (3.9.0) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 0.7) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + jekyll-avatar (0.7.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.1.1) + coffee-script (~> 2.2) + coffee-script-source (~> 1.11.1) + jekyll-commonmark (1.3.1) + commonmarker (~> 0.14) + jekyll (>= 3.7, < 5.0) + jekyll-commonmark-ghpages (0.1.6) + commonmarker (~> 0.17.6) + jekyll-commonmark (~> 1.2) + rouge (>= 2.0, < 4.0) + jekyll-default-layout (0.1.4) + jekyll (~> 3.0) + jekyll-feed (0.15.1) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.13.0) + jekyll (>= 3.4, < 5.0) + octokit (~> 4.0, != 4.4.0) + jekyll-mentions (1.6.0) + html-pipeline (~> 2.3) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.2) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.1.2) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.5.4) + jekyll (> 3.5, < 5.0) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.1.1) + jekyll (~> 3.5) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.12.0) + gemoji (~> 3.0) + html-pipeline (~> 2.2) + jekyll (>= 3.0, < 5.0) + kramdown (2.3.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.3) + listen (3.4.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.3.6) + mini_portile2 (2.5.0) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.14.3) + multipart-post (2.1.1) + nokogiri (1.11.0) + mini_portile2 (~> 2.5.0) + racc (~> 1.4) + octokit (4.20.0) + faraday (>= 0.9) + sawyer (~> 0.8.0, >= 0.5.3) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (3.1.1) + racc (1.5.2) + rb-fsevent (0.10.4) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.4) + rouge (3.23.0) + ruby-enum (0.8.0) + i18n + ruby2_keywords (0.0.2) + rubyzip (2.3.0) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.8.2) + addressable (>= 2.3.5) + faraday (> 0.8, < 2.0) + simpleidn (0.1.1) + unf (~> 0.1.4) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + thread_safe (0.3.6) + typhoeus (1.4.0) + ethon (>= 0.9.0) + tzinfo (1.2.9) + thread_safe (~> 0.1) + unf (0.1.4) + unf_ext + unf_ext (0.0.7.7) + unicode-display_width (1.7.0) + zeitwerk (2.4.2) + +PLATFORMS + ruby + +DEPENDENCIES + github-pages + jekyll (>= 3.7) + jekyll-remote-theme + kramdown (>= 2.3.0) + nokogiri (< 1.11.1) + +BUNDLED WITH + 2.1.4 diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..7ebbc15 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,66 @@ +repository: {user}/src +output: web +topnav_title: {repo_name} +site_title: {repo_name} +company_name: {copyright} +description: {description} +# Set to false to disable KaTeX math +use_math: true +# Add Google analytics id if you have one and want to use it here +google_analytics: +# See http://nbdev.fast.ai/search for help with adding Search +google_search: + +host: 127.0.0.1 +# the preview server used. Leave as is. +port: 4000 +# the port where the preview is rendered. + +exclude: + - .idea/ + - .gitignore + - vendor + +exclude: [vendor] + +highlighter: rouge +markdown: kramdown +kramdown: + input: GFM + auto_ids: true + hard_wrap: false + syntax_highlighter: rouge + +collections: + tooltips: + output: false + +defaults: + - + scope: + path: "" + type: "pages" + values: + layout: "page" + comments: true + search: true + sidebar: home_sidebar + topnav: topnav + - + scope: + path: "" + type: "tooltips" + values: + layout: "page" + comments: true + search: true + tooltip: true + +sidebars: +- home_sidebar + +plugins: + - jekyll-remote-theme + +remote_theme: fastai/nbdev-jekyll-theme +baseurl: /{repo_name}/src/ \ No newline at end of file diff --git a/docs/_data/topnav.yml b/docs/_data/topnav.yml new file mode 100644 index 0000000..680ffb5 --- /dev/null +++ b/docs/_data/topnav.yml @@ -0,0 +1,10 @@ +topnav: +- title: Topnav + items: + - title: github + external_url: https://github.com/{user}/src/tree/{branch}/ + +#Topnav dropdowns +topnav_dropdowns: +- title: Topnav dropdowns + folders: \ No newline at end of file diff --git a/docs/feed.xml b/docs/feed.xml new file mode 100644 index 0000000..d8d6ac9 --- /dev/null +++ b/docs/feed.xml @@ -0,0 +1,32 @@ +--- +search: exclude +layout: none +--- + + + + + {{ site.title | xml_escape }} + {{ site.description | xml_escape }} + {{ site.url }}/ + + {{ site.time | date_to_rfc822 }} + {{ site.time | date_to_rfc822 }} + Jekyll v{{ jekyll.version }} + {% for post in site.posts limit:10 %} + + {{ post.title | xml_escape }} + {{ post.content | xml_escape }} + {{ post.date | date_to_rfc822 }} + {{ post.url | prepend: site.url }} + {{ post.url | prepend: site.url }} + {% for tag in post.tags %} + {{ tag | xml_escape }} + {% endfor %} + {% for tag in page.tags %} + {{ cat | xml_escape }} + {% endfor %} + + {% endfor %} + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..38a04d6 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,24 @@ +--- +layout: none +search: exclude +--- + + + + {% for post in site.posts %} + {% unless post.search == "exclude" %} + + {{site.url}}{{post.url}} + + {% endunless %} + {% endfor %} + + + {% for page in site.pages %} + {% unless page.search == "exclude" %} + + {{site.url}}{{ page.url}} + + {% endunless %} + {% endfor %} + \ No newline at end of file diff --git a/html_utf8encode.bat b/html_utf8encode.bat new file mode 100644 index 0000000..0ddff8d --- /dev/null +++ b/html_utf8encode.bat @@ -0,0 +1 @@ +"python" "%~dp0/utf8encode.py" \ No newline at end of file diff --git a/index.ipynb b/index.ipynb new file mode 100644 index 0000000..5e85b73 --- /dev/null +++ b/index.ipynb @@ -0,0 +1,382 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# [`nbdev`](https://nbdev.fast.ai/) Data Science template\n", + "> This template merges [`cookiecutter`](https://drivendata.github.io/cookiecutter-data-science/)'s data science project template with [`nbdev`](https://nbdev.fast.ai/) software development tool. some other features to the folde structures are also included, based on this developer's experience with data science projects." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# README.md Template below:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## include code usage example in readme/index - THIS IS TEMPLATE CONTENT - " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#import parts of your code in order to show examples\n", + "from src.ds__load import Class\n", + "from src.ds__preprocess import func" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can include code snipets on your index (README.md) simply like thiis:\n", + "cls_func = Class(func)\n", + "cls_func.apply(1,5)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# another example\n", + "from functools import reduce\n", + "prod_function = lambda x: reduce(lambda x1,x2 : x1*x2, x)\n", + "cls_prod = Class(prod_function)\n", + "cls_prod.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# < Analytics Projetct Name >\n", + "\n", + "> The goal of the project is to ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checklist\n", + "\n", + "Mark which tasks have been performed\n", + "\n", + "- [ ] **Summary:** you have included a description, usage, output, accuracy and metadata of your model.\n", + "- [ ] **Pre-processing:** you have applied pre-processing to your data and this function is reproducible to new datasets.\n", + "- [ ] **Feature selection:** you have performed feature selection while modeling.\n", + "- [ ] **Modeling dataset creation:** you have well-defined and reproducible code to generate a modeling dataset that reproduces the behavior of the target dataset.This pipeline is also applicable to generate the deploy dataset.\n", + "- [ ] **Model selection:** you have chosen a suitable model according to the project specification.\n", + "- [ ] **Model validation:** you have validated your model according to the project specification.\n", + "- [ ] **Model optimization:** you have defined functions to optimize hyper-parameters and they are reproducible.\n", + "- [ ] **Peer-review:** your code and results have been verified by your colleagues and pre-approved by them.\n", + "- [ ] **Acceptance:** this model report has been accepted by the Data Science Manager. State name and date.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "The model is designed to ... (state a simple sentence here to indicate what your model does)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Describe step-by-step what should be done to run the algorithm, as in the example below:\n", + "\n", + "1.\tDownload the database from `` and place it on an acessible folder in your machine\n", + "2.\tClone this repository to your machine\n", + "3.\tUpdate the `path` variable in main, to the path chosen on step 1. \n", + "3.\tMake sure Python 3.6 is installed on your machine\n", + "4.\tInstall all libraries on `requirements.txt` using the command:\n", + "\n", + "`pip install -r requirements.txt`\n", + "\n", + "5.\tRun the command `python src/main.py` \n", + "6.\tCheck the results on the `output` directory\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Output\n", + "\n", + "Clarify what are the files generated as output, with their names, paths and a brief description of the data structure (in case it is a CSV for example)\n", + "\n", + "Example:\n", + "\n", + "The model outputs a list called `.csv` onto `` and contains the following variables:\n", + "\n", + "- var1\n", + "- var2\n", + "- var3\n", + "- var4\n", + "- var5\n", + "- var6\n", + "\n", + "In the future, the Score variable (from the Credit risk algorithm) shall also be automatically merged onto the output file.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## [Metadata](docs/project_metadata.json)\n", + "\n", + "Here you should go the project metadata dictionary (written in JSON), as the file describes\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance Metrics\n", + "\n", + "The project will be followed by a dashboard, available as a source code on this repo.\n", + "The model is considered to be drifting when there are no visible distinction between clusters and/or when the KPI's are below the goal settled for the year.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre-processing\n", + "\n", + "Here should be stated in a step-by-step way what was done in the pre-processing stage of the project.\n", + "\n", + "Example:\n", + "\n", + "1.\tExcluded readings with `` from ``: bad lines.\n", + "2.\tLimited `` to 4, which represents Bank payroll.\n", + "3.\tExcludes `` readings with more than 30 days in advance, in case they represent more 1%. \n", + "4.\tStacked all invoices by customer with the mean of each variable (although in some cases other aggregation functions are used instead).\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature selection\n", + "\n", + "Here should be explained how the features used for the model were selected, if any feature selection methodology was implemented and so on.\n", + "\n", + "Example:\n", + "\n", + "The feature selection methodology used was the Forward Selection. Also, some variables used on the Credit risk model were also considered, up until when the model reached its optimal performance. PCA and RFE did not present better results so far.\n", + "\n", + "**Features used**\n", + "\n", + "Here should be placed the features used to run the model entirely.\n", + "\n", + "Example:\n", + "\n", + "The features used to train both the clustering and classifier can be found under `models/config`, with the static variable `COLS_TO_TRAIN`.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modeling\n", + "\n", + "Here goes a better explanation on what algorithm was chosen and how does the complete model work.\n", + "\n", + "Example:\n", + "\n", + "To identify the clusters, the K-Means algorithm was chosen. A Random Forest Classifier from scikit-learn was then trained to predict the clusters for new customers and also customers with updated variable values. This pipeline has shown a good performance, running under 5 minutes and with satisfactory results.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model selection\n", + "\n", + "Here should be stated a clarification on how it was decided to choose the running algorithm, and why not other ones. If any future implementations are envisioned, a brief statement should be made as well.\n", + "\n", + "Example:\n", + "\n", + "Although K-Medians normally performs better with outliers, K-Means still has shown more consistent results and therefore was decided to be used. K-Modes and K-Prototypes were also tested, for including categorical variables, but also had not either reached a good Silhouette Score or divided the data in a meaningful way. Spectral Clustering presented good results for a sample dataset, but has shown poor performance for the complete dataset, and still has to be studied for the next version. DBSCAN was considered, but since it neglects the outliers, it was then decided to be discarded. \n", + "\n", + "For the classification algorithm, Random Forest outperformed the Decision Tree and the XGBoost classifiers.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model validation\n", + "\n", + "A step-by-step explanation on what was done to choose the running model. The metrics measured to select them are also desired to be placed in the steps.\n", + "\n", + "Example:\n", + "\n", + "1.\tThe cluster numbers were chosen based on the Silhouette Score of 0,85.\n", + "2.\tThe dataset was sampled to ensure the clustering consistency.\n", + "3.\tThe clustered data was used as input for the Random Forest Classifier\n", + "4.\tThe data was split into train and test (80/20).\n", + "5.\tThe classifier reached the Accuracy Score of 0,96.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model optimization\n", + "\n", + "Here should be explained if any kind of optimization of the model was made.\n", + "\n", + "Example:\n", + "\n", + "The hyper-parameters `n_estimators` and `max_depth` were manually optimized for the classifier, although there is still room for improvement, through GridSearch, Genetic Algorithms, etc. \n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Drifting and Retraining\n", + "\n", + "Here should be explained what are the necessary steps done in order to retrain the model, whenever some kind of drifting or underperformance is noticed.\n", + "\n", + "Example:\n", + "\n", + "Whenever drifting is noticed, lines 40, 41, 47, 56, 60 and 61 need to be uncommented on `main.py` before running the model. Once it is done, they need to be commented again. In future versions, this process is ought to be automated.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Foreseen improvements\n", + "\n", + "If any future improvements are identified during any of the steps, they should be pointed out in this section.\n", + "\n", + "Example:\n", + "\n", + "- New features to be engineered in the next versions can potentially enhance clustering or recommendations. For example, it is still to be tested whether there is a type of customer that only pays back on a specific time of the month or week, and therefore they won't show in the recommendations list when they don't pay. \n", + "\n", + "- Treating the (many) outliers with some other measures, could potentially enhance predictions.\n", + "\n", + "- Automatically gather data and run the model is something to be worked on for future versions" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/log/.gitkeep b/log/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/notebooks_dev/DE_00_extract.ipynb b/notebooks_dev/DE_00_extract.ipynb new file mode 100644 index 0000000..bfda83b --- /dev/null +++ b/notebooks_dev/DE_00_extract.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp de__extract" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Engineering Extract\n", + "> Module containing data extracting functionalities to be used in the Data Engineering pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DE_01_transform.ipynb b/notebooks_dev/DE_01_transform.ipynb new file mode 100644 index 0000000..2e538af --- /dev/null +++ b/notebooks_dev/DE_01_transform.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp de__transform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Engineering Transform\n", + "> Module containing data transformation functionalities to be used in the Data Engineering pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DE_02_load.ipynb b/notebooks_dev/DE_02_load.ipynb new file mode 100644 index 0000000..54d0d6f --- /dev/null +++ b/notebooks_dev/DE_02_load.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp de__load" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Engineering Load\n", + "> Module containing data loading functionalities to be used in the Data Engineering pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DS_00_load.ipynb b/notebooks_dev/DS_00_load.ipynb new file mode 100644 index 0000000..14fadc1 --- /dev/null +++ b/notebooks_dev/DS_00_load.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__load" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Load\n", + "> Module containing data loading functionalities to be used in the Data Science pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} + diff --git a/notebooks_dev/DS_01_preprocess.ipynb b/notebooks_dev/DS_01_preprocess.ipynb new file mode 100644 index 0000000..03821f3 --- /dev/null +++ b/notebooks_dev/DS_01_preprocess.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__preprocess" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Preprocess\n", + "> Module containing data preprocessing functionalities to be used in the Data Science pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DS_02_build_features.ipynb b/notebooks_dev/DS_02_build_features.ipynb new file mode 100644 index 0000000..e68d308 --- /dev/null +++ b/notebooks_dev/DS_02_build_features.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__build_features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Build Features\n", + "> Module containing feature engineering functionalities to be used in the Data Science pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DS_03_modelling.ipynb b/notebooks_dev/DS_03_modelling.ipynb new file mode 100644 index 0000000..162e52f --- /dev/null +++ b/notebooks_dev/DS_03_modelling.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__modelling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Modelling\n", + "> Module containing modelling/model functionalities and classes to be used in the Data Science pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DS_04_validate.ipynb b/notebooks_dev/DS_04_validate.ipynb new file mode 100644 index 0000000..82677d5 --- /dev/null +++ b/notebooks_dev/DS_04_validate.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__validate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Validate\n", + "> Module containing model validation functionalities to be used in the Data Science pipelines" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/DS_05_postprocess.ipynb b/notebooks_dev/DS_05_postprocess.ipynb new file mode 100644 index 0000000..9097686 --- /dev/null +++ b/notebooks_dev/DS_05_postprocess.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp ds__postprocess" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science Postprocess\n", + "> Module intended for creating model output postprocessing routines, prior to serving or anything downstream to inference. E.g.: design AB testings..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/MLE_00_pipeline_utils.ipynb b/notebooks_dev/MLE_00_pipeline_utils.ipynb new file mode 100644 index 0000000..09fe9f0 --- /dev/null +++ b/notebooks_dev/MLE_00_pipeline_utils.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp mle__pipeline_utils" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine Learning Engineering Pipeline Utils\n", + "> Module intended to be used by the Machine Learning Engineering team for building pipelines utils or connceting pieces from other DS and DE modules, still, full pipelines are recomended to be under the pipeline-scripts folder." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks_dev/MLE_01_serving.ipynb b/notebooks_dev/MLE_01_serving.ipynb new file mode 100644 index 0000000..2053928 --- /dev/null +++ b/notebooks_dev/MLE_01_serving.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define module in wihch `#export` tag will save the code in `src`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#default_exp mle__serving" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import modules that are only used in documentation, nbdev related stuff like testing using assert and more generally inside this notebook (not going to src)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.showdoc import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2 #autoreload to make code from other modules get updated online inside notebook\n", + "\n", + "import sys\n", + "sys.path.append('..') #appends project root to path in order to import project packages since `noteboks_dev` is not on the root\n", + "\n", + "#DO NOT EDIT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "\n", + "#Internal Imports\n", + "#imports that are going to be used only during development and are not intended to be loaded inside the generated modules.\n", + "#for example: use imported modules to generate graphs for documentation, but lib is unused in actual package\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Machine Learning Engineering Serving\n", + "> Module intended to be used by the Machine Learning Engineering team for building serving functions, API helping functions, DB writing queries and everything related to serving models... Still, the final pipelines are recomended to be under pipeline-scripts folder on root." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dev comments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TODOs - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- [X] TODO: do something\n", + "- [ ] TODO: do something else" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notebook History - THIS IS TEMPLATE CONTENT -" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "THIS IS TEMPLATE CONTENT\n", + "- 16/02 - developed feature A as requested by business team\n", + "- 17/02 - couldn't quite understand specific business rule, request explanation from business team\n", + "- 21/02 - business rule is now clearly explained, foo should be ran before bar and not otherwise" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code session" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### External imports\n", + "> imports that are intended to be loaded in the actual modules (going to src) e.g.: module dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "\n", + "#import ..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < func > THIS IS TEMPLATE CONTENT- " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def func(a,b): \n", + " '''\n", + " a function that subs a and b\n", + " '''\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`func` comments and usage examples for documentation:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "func(1,2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### < Class > THIS IS TEMPLATE CONTENT-- " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "class Class:\n", + " '''\n", + " a class to apply a function through the apply method\n", + " '''\n", + " def __init__(self, func):\n", + " assert callable(func), 'func should be callable type'\n", + " self.func = func\n", + " \n", + " def apply(self, *args, **kwargs):\n", + " return self.func(*args, **kwargs)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Class` comments and usage examples for documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cls_instance = Class(sum)\n", + "\n", + "cls_instance.apply([1,2,3,4])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Code created from src\n", + "> Session concainning code generated in src (.py files) and converted back to notebook using nbdev_update_lib command" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "##############\n", + "#we highly recommend you creating ccode from the notebook instead \n", + "#of creating from src and running nbdev_update_lib. Still, if you want\n", + "#to proceeed, please create your new code bellow this tag before running nbdev_update_lib\n", + "##############" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Experiment session\n", + "> Session to run the code and test functions and classes generated in this notebook. Helpfull for documentation and experimental development.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tests\n", + "> Session to write tests in the nb-dev fashion (using assert)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test_1(a,b):\n", + " '''\n", + " tests if func is returning a sum\n", + " '''\n", + " return func(a,b) == a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Passed!\n" + ] + } + ], + "source": [ + "assert test_1(1,2)\n", + "print('Passed!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export -" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "from nbdev.export import notebook2script\n", + "notebook2script()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "latex_envs": { + "LaTeX_envs_menu_present": true, + "autoclose": false, + "autocomplete": true, + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 1, + "hotkeys": { + "equation": "Ctrl-E", + "itemize": "Ctrl-I" + }, + "labels_anchors": false, + "latex_user_defs": false, + "report_style_numbering": false, + "user_envs_cfg": false + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "250.319px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pipeline-scripts/Setup.py b/pipeline-scripts/Setup.py new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/settings.ini b/settings.ini new file mode 100644 index 0000000..19aac76 --- /dev/null +++ b/settings.ini @@ -0,0 +1,77 @@ +[DEFAULT] +#### All sections below are required unless otherwise specified +host = github +lib_name = src +repo_name = {repo_name} +user = {user} +author = {author} +author_email = {author_email} +description = {description} +copyright = {copyright} +branch = {branch} +version = 0.0.1 +min_python = 3.6 +audience = Developers +language = English +#### Anything shown as '%(...)s' is substituted with that setting automatically +doc_host = https://%(user)s.github.io +doc_baseurl = /%(repo_name)s/%(lib_name)s/ +lib_path = %(lib_name)s +title = %(repo_name)s +#For Enterprise Git pages use: +#doc_host = https://pages.github.%(company_name)s.com. +git_url = https://github.com/%(user)s/%(lib_name)s/tree/%(branch)s/ + + +#### Add licenses and see current list in `setup.py` +license = apache2 +#### From 1-7: Planning Pre-Alpha Alpha Beta Production Mature Inactive +status = 2 +#### Optional. Same format as setuptools requirements +# requirements = +#### Optional. Same format as setuptools console_scripts +# console_scripts = +#### Optional. Same format as setuptools dependency-links +# dep_links = + + +#### Change to, e.g. "nbs", to put your notebooks in nbs dir instead of repo root +nbs_path = notebooks_dev +doc_path = docs + +#doc_baseurl = /%(lib_name)s/ +# For Enterprise Github pages docs use: +# doc_baseurl = /%(repo_name)s/%(lib_name)s/ + + +#### For Enterprise Git add variable repo_name and company name +# company_name = nike +# keywords = some keywords + +#### Set to True if you want to create a more fancy sidebar.json than the default +custom_sidebar = False + +#### +# You probably won't need to change anything under here, +# unless you have some special requirements +#### + +#### Whether to look for library notebooks recursively in the `nbs_path` dir +recursive = True + +#### Anything shown as '%(...)s' is substituted with that setting automatically +# For Enterprise Github use: +#git_url = https://github.%(company_name)s.com/%(repo_name)s/%(lib_name)s/tree/%(branch)s/ + + +#Optional advanced parameters +#Monospace docstings: adds
 tags around the doc strings, preserving newlines/indentation.
+#monospace_docstrings = False
+#Test flags: introduce here the test flags you want to use separated by |
+#tst_flags = 
+#Custom sidebar: customize sidebar.json yourself for advanced sidebars (False/True)
+#custom_sidebar = 
+#Cell spacing: if you want cell blocks in code separated by more than one new line
+#cell_spacing = 
+#Custom jekyll styles: if you want more jekyll styles than tip/important/warning, set them here
+#jekyll_styles = note,warning,tip,important
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..de97dee
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,47 @@
+from pkg_resources import parse_version
+from configparser import ConfigParser
+import setuptools
+assert parse_version(setuptools.__version__)>=parse_version('36.2')
+
+# note: all settings are in settings.ini; edit there, not here
+config = ConfigParser(delimiters=['='])
+config.read('settings.ini')
+cfg = config['DEFAULT']
+
+cfg_keys = 'version description keywords author author_email'.split()
+expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
+for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
+setup_cfg = {o:cfg[o] for o in cfg_keys}
+
+licenses = {
+    'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
+}
+statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
+    '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
+py_versions = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8'.split()
+
+requirements = cfg.get('requirements','').split()
+lic = licenses[cfg['license']]
+min_python = cfg['min_python']
+
+setuptools.setup(
+    name = cfg['lib_name'],
+    license = lic[0],
+    classifiers = [
+        'Development Status :: ' + statuses[int(cfg['status'])],
+        'Intended Audience :: ' + cfg['audience'].title(),
+        'License :: ' + lic[1],
+        'Natural Language :: ' + cfg['language'].title(),
+    ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]],
+    url = cfg['git_url'],
+    packages = setuptools.find_packages(),
+    include_package_data = True,
+    install_requires = requirements,
+    dependency_links = cfg.get('dep_links','').split(),
+    python_requires  = '>=' + cfg['min_python'],
+    long_description = open('README.md').read(),
+    long_description_content_type = 'text/markdown',
+    zip_safe = False,
+    entry_points = { 'console_scripts': cfg.get('console_scripts','').split() },
+    **setup_cfg)
+
diff --git a/src/.gitkeep b/src/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..f102a9c
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/src/_nbdev.py b/src/_nbdev.py
new file mode 100644
index 0000000..52203cc
--- /dev/null
+++ b/src/_nbdev.py
@@ -0,0 +1,24 @@
+# AUTOGENERATED BY NBDEV! DO NOT EDIT!
+
+__all__ = ["index", "modules", "custom_doc_links", "git_url"]
+
+index = {"func": "MLE_01_serving.ipynb",
+         "Class": "MLE_01_serving.ipynb"}
+
+modules = ["de__extract.py",
+           "de__transform.py",
+           "de__load.py",
+           "ds__load.py",
+           "ds__preprocess.py",
+           "ds__build_features.py",
+           "ds__modelling.py",
+           "ds__validate.py",
+           "ds__postprocess.py",
+           "mle__pipeline_utils.py",
+           "mle__serving.py"]
+
+doc_url = "https://{user}.github.io/{repo_name}/src/"
+
+git_url = "https://github.com/{user}/src/tree/{branch}/"
+
+def custom_doc_links(name): return None
diff --git a/src/de__extract.py b/src/de__extract.py
new file mode 100644
index 0000000..8555636
--- /dev/null
+++ b/src/de__extract.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DE_00_extract.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/de__load.py b/src/de__load.py
new file mode 100644
index 0000000..153b172
--- /dev/null
+++ b/src/de__load.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DE_02_load.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/de__transform.py b/src/de__transform.py
new file mode 100644
index 0000000..694e9be
--- /dev/null
+++ b/src/de__transform.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DE_01_transform.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__build_features.py b/src/ds__build_features.py
new file mode 100644
index 0000000..7d54b54
--- /dev/null
+++ b/src/ds__build_features.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_02_build_features.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__load.py b/src/ds__load.py
new file mode 100644
index 0000000..4c358b9
--- /dev/null
+++ b/src/ds__load.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_00_load.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__modelling.py b/src/ds__modelling.py
new file mode 100644
index 0000000..782ec69
--- /dev/null
+++ b/src/ds__modelling.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_03_modelling.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__postprocess.py b/src/ds__postprocess.py
new file mode 100644
index 0000000..1924d99
--- /dev/null
+++ b/src/ds__postprocess.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_05_postprocess.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__preprocess.py b/src/ds__preprocess.py
new file mode 100644
index 0000000..ebd5dd8
--- /dev/null
+++ b/src/ds__preprocess.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_01_preprocess.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/ds__validate.py b/src/ds__validate.py
new file mode 100644
index 0000000..7b6a283
--- /dev/null
+++ b/src/ds__validate.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/DS_04_validate.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/mle__pipeline_utils.py b/src/mle__pipeline_utils.py
new file mode 100644
index 0000000..24f2080
--- /dev/null
+++ b/src/mle__pipeline_utils.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/MLE_00_pipeline_utils.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/src/mle__serving.py b/src/mle__serving.py
new file mode 100644
index 0000000..5018042
--- /dev/null
+++ b/src/mle__serving.py
@@ -0,0 +1,34 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: notebooks_dev/MLE_01_serving.ipynb (unless otherwise specified).
+
+__all__ = ['func', 'Class']
+
+# Cell
+
+#import ...
+
+# Cell
+def func(a,b):
+    '''
+    a function that subs a and b
+    '''
+    return a + b
+
+# Cell
+class Class:
+    '''
+    a class to apply a function through the apply method
+    '''
+    def __init__(self, func):
+        assert callable(func), 'func should be callable type'
+        self.func = func
+
+    def apply(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+
+# Cell
+##############
+#we highly recommend you creating ccode from the notebook instead
+#of creating from src and running nbdev_update_lib. Still, if you want
+#to proceeed, please create your new code bellow this tag before running nbdev_update_lib
+##############
\ No newline at end of file
diff --git a/utf8encode.py b/utf8encode.py
new file mode 100644
index 0000000..bbf8161
--- /dev/null
+++ b/utf8encode.py
@@ -0,0 +1,33 @@
+import glob
+
+def utf8encode(file_path):
+	'''
+	encodes a file to utf-8
+	'''	
+	with open(file_path, 'r+', encoding='utf-8', errors = 'replace') as file:				
+		file.write(file.read())
+		file.truncate()
+	
+	print(f'Converted {file_path} utf-8 (unicode)')
+	return	
+
+def get_html_paths(roots):	
+	'''
+	gets path of html files recursively starting from root in roots(list of roots)
+	'''
+	assert isinstance(roots,(tuple,list,set))
+	all_files = []
+	for root in roots:
+	    files = glob.glob(f'{root}/**/*.html',
+	                      recursive=True)
+	    all_files += files if type(files) != str else [files]
+
+	return all_files
+
+if __name__ == '__main__':	
+	'''
+	converts .html files in /docs to utf-8 and unicode charset
+	'''
+	paths = get_html_paths(['docs'])
+	for path in paths:		
+		utf8encode(path)