From 186c67959d1d2b6fa64428ebc7036a6adb02bf61 Mon Sep 17 00:00:00 2001 From: John Pugliesi Date: Thu, 23 Apr 2020 14:18:47 -0700 Subject: [PATCH] Merge 2.1.1 --- .bumpversion.cfg | 2 +- CHANGELOG.md | 2 +- CONTRIBUTING.md | 24 +++++---- README.md | 2 +- RELEASING.md | 2 +- docs/changelog.md | 7 +++ papermill/cli.py | 43 ++++++++------- papermill/clientwrap.py | 2 +- papermill/exceptions.py | 3 +- papermill/execute.py | 44 ++++++++++++++-- papermill/tests/notebooks/broken1.ipynb | 43 +++++++++++++++ papermill/tests/test_adl.py | 2 +- papermill/tests/test_cli.py | 11 ++++ papermill/tests/test_execute.py | 70 ++++++++++++++++++++----- papermill/tests/test_hdfs.py | 8 ++- papermill/tests/test_iorw.py | 4 +- papermill/translators.py | 4 +- papermill/version.py | 2 +- pytest.ini | 3 ++ 19 files changed, 215 insertions(+), 63 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 06fdf197..c01d8a49 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.1.0 +current_version = 2.1.1 commit = True tag = True tag_name = {new_version} diff --git a/CHANGELOG.md b/CHANGELOG.md index c522db41..20b308f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,3 @@ # Change Log -See the [papermill documentation](https://papermill.readthedocs.io/changelog.html) +See the [papermill documentation](https://papermill.readthedocs.io/en/latest/changelog.html) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1774669..45a31ce8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,5 @@ # So You Want to Contribute to Papermill! + We welcome all contributions to Papermill both large and small. We encourage you to join our community. ## Our Community Values @@ -12,35 +13,38 @@ All contributions are equally important. Documentation, answering questions, and Please read our entire code of conduct [here](https://github.com/nteract/nteract/blob/master/CODE_OF_CONDUCT.md). Also, check out the for the [Python](https://github.com/nteract/nteract/blob/master/CODE_OF_CONDUCT.md) code of conduct. ## Setting up Your Development Environment + Following these instructions should give you an efficient path to opening your first pull-request. ### Cloning the Papermill Repository + Fork the repository to your local Github account. Clone this repository to your local development machine. + ```bash git clone https://github.com//papermill cd papermill ``` ### Install an Editable Version -We prefer to use [conda](https://conda.io/docs/user-guide/tasks/manage-environments.html) to manage the development environment. -```bash -conda create -n dev -. activate dev -``` -or use native venv capabilities if you prefer. + +We prefer to use native venv to manage the development environment. + ```bash python3 -m venv dev source dev/bin/activate ``` Install Papermill using: + ```bash pip install -e '.[dev]' ``` -If you're using pip 19 or above, you should run +or use conda if you prefer [conda](https://conda.io/docs/user-guide/tasks/manage-environments.html): + ```bash -pip install -e '.[dev]' --no-use-pep517 +conda create -n dev +. activate dev ``` _Note: When you are finished you can use `source deactivate` to go back to your base environment._ @@ -70,11 +74,9 @@ This will require python3.6, python3.8, and python3.7 to be installed. **Note** Alternavitely pytest can be used if you have an environment already setup which works or has custom packages not present in the tox build. ```bash -pytest --pyargs papermill +pytest ``` -The `pyargs` option allows `pytest` to interpret arguments as python package names. An advantage is that `pytest` will run in any directory, and this approach follows the `pytest` [best practices](https://docs.pytest.org/en/latest/goodpractices.html#tests-as-part-of-application-code). - Now there should be a working and editable installation of Papermill to start making your own contributions. ### Building Documentation diff --git a/README.md b/README.md index 89352d18..af23858b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![image](https://codecov.io/github/nteract/papermill/coverage.svg?branch=master)](https://codecov.io/github/nteract/papermill?branch=master) [![Documentation Status](https://readthedocs.org/projects/papermill/badge/?version=latest)](http://papermill.readthedocs.io/en/latest/?badge=latest) [![badge](https://tinyurl.com/ybwovtw2)](https://mybinder.org/v2/gh/nteract/papermill/master?filepath=binder%2Fprocess_highlight_dates.ipynb) -[![badge](https://tinyurl.com/y7uz2eh9)](https://mybinder.org/v2/gh/nteract/papermill/master? +[![badge](https://tinyurl.com/y7uz2eh9)](https://mybinder.org/v2/gh/nteract/papermill/master?) [![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/) [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/) [![Python 3.8](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) diff --git a/RELEASING.md b/RELEASING.md index 9b23921c..fe5a987f 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -11,7 +11,7 @@ Change from patch to minor or major for appropriate version updates. ```bash bumpversion patch -git push && git push --tags +git push upstream && git push upstream --tags ``` ## Push to PyPI diff --git a/docs/changelog.md b/docs/changelog.md index fbc753e6..5ef6e7e4 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,12 @@ # Change Log +## 2.1.1 + +- DeadKernelExceptions, usually from OOM, now exit with a status code of 138 from the CLI. +- Error cell at the top of failed notebook has been made better. It now also has a link to an injected cell where the error occurred. +- Updated a deprecated function to the new function name for nbclient dependency. +- Some development and documentation updates / fixes have also been made by a few different contributions (thank you!). + ## 2.1.0 - Support for python 3.5 has been dropped. Upstream library changes for async were causing process deadlocks with await commands. End-of-life is later this year for 3.5 anyway so we decided to also drop support here. diff --git a/papermill/cli.py b/papermill/cli.py index 2ba95c7c..4747bef9 100755 --- a/papermill/cli.py +++ b/papermill/cli.py @@ -4,6 +4,8 @@ import os import sys from stat import S_ISFIFO +import nbclient +import traceback import base64 import logging @@ -216,24 +218,29 @@ def papermill( for name, value in parameters_raw or []: parameters_final[name] = value - execute_notebook( - input_path=notebook_path, - output_path=output_path, - parameters=parameters_final, - engine_name=engine, - request_save_on_cell_execute=request_save_on_cell_execute, - autosave_cell_every=autosave_cell_every, - prepare_only=prepare_only, - kernel_name=kernel, - progress_bar=progress_bar, - log_output=log_output, - stdout_file=stdout_file, - stderr_file=stderr_file, - start_timeout=start_timeout, - report_mode=report_mode, - cwd=cwd, - execution_timeout=execution_timeout, - ) + try: + execute_notebook( + input_path=notebook_path, + output_path=output_path, + parameters=parameters_final, + engine_name=engine, + request_save_on_cell_execute=request_save_on_cell_execute, + autosave_cell_every=autosave_cell_every, + prepare_only=prepare_only, + kernel_name=kernel, + progress_bar=progress_bar, + log_output=log_output, + stdout_file=stdout_file, + stderr_file=stderr_file, + start_timeout=start_timeout, + report_mode=report_mode, + cwd=cwd, + execution_timeout=execution_timeout, + ) + except nbclient.exceptions.DeadKernelError: + # Exiting with a special exit code for dead kernels + traceback.print_exc() + sys.exit(138) def _resolve_type(value): diff --git a/papermill/clientwrap.py b/papermill/clientwrap.py index 082bcdd0..18411094 100644 --- a/papermill/clientwrap.py +++ b/papermill/clientwrap.py @@ -36,7 +36,7 @@ def execute(self, **kwargs): with self.setup_kernel(**kwargs): self.log.info("Executing notebook with kernel: %s" % self.kernel_name) self.papermill_execute_cells() - info_msg = self._wait_for_reply(self.kc.kernel_info()) + info_msg = self.wait_for_reply(self.kc.kernel_info()) self.nb.metadata['language_info'] = info_msg['content']['language_info'] self.set_widgets_metadata() diff --git a/papermill/exceptions.py b/papermill/exceptions.py index 38a81687..ac3c49f1 100644 --- a/papermill/exceptions.py +++ b/papermill/exceptions.py @@ -22,7 +22,8 @@ class PapermillMissingParameterException(PapermillException): class PapermillExecutionError(PapermillException): """Raised when an exception is encountered in a notebook.""" - def __init__(self, exec_count, source, ename, evalue, traceback): + def __init__(self, cell_index, exec_count, source, ename, evalue, traceback): + self.cell_index = cell_index self.exec_count = exec_count self.source = source self.ename = ename diff --git a/papermill/execute.py b/papermill/execute.py index dd5853d8..1c14760d 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -83,6 +83,8 @@ def execute_notebook( nb = parameterize_notebook(nb, parameters, report_mode) nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode) + # clear out any existing error markers from previous papermill runs + nb = remove_error_markers(nb) if not prepare_only: # Fetch the kernel name if it's not supplied @@ -144,13 +146,35 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb +ERROR_MARKER_TAG = "papermill-error-cell-tag" + +ERROR_STYLE = ( + 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' +) + ERROR_MESSAGE_TEMPLATE = ( - '' - "An Exception was encountered at 'In [%s]'." + '' + "An Exception was encountered at 'In [%s]'." + '' +) + +ERROR_ANCHOR_MSG = ( + '' + 'Execution using papermill encountered an exception here and stopped:' '' ) +def remove_error_markers(nb): + nb = copy.deepcopy(nb) + nb.cells = [ + cell + for cell in nb.cells + if ERROR_MARKER_TAG not in cell.metadata.get("tags", []) + ] + return nb + + def raise_for_execution_errors(nb, output_path): """Assigned parameters into the appropriate place in the input notebook @@ -162,7 +186,7 @@ def raise_for_execution_errors(nb, output_path): Path to write executed notebook """ error = None - for cell in nb.cells: + for index, cell in enumerate(nb.cells): if cell.get("outputs") is None: continue @@ -171,6 +195,7 @@ def raise_for_execution_errors(nb, output_path): if output.ename == "SystemExit" and (output.evalue == "" or output.evalue == "0"): continue error = PapermillExecutionError( + cell_index=index, exec_count=cell.execution_count, source=cell.source, ename=output.ename, @@ -180,9 +205,18 @@ def raise_for_execution_errors(nb, output_path): break if error: - # Write notebook back out with the Error Message at the top of the Notebook. + # Write notebook back out with the Error Message at the top of the Notebook, and a link to + # the relevant cell (by adding a note just before the failure with an HTML anchor) error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count) error_msg_cell = nbformat.v4.new_markdown_cell(error_msg) - nb.cells = [error_msg_cell] + nb.cells + error_msg_cell.metadata['tags'] = [ERROR_MARKER_TAG] + error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG) + error_anchor_cell.metadata['tags'] = [ERROR_MARKER_TAG] + + # put the anchor before the cell with the error, before all the indices change due to the + # heading-prepending + nb.cells.insert(error.cell_index, error_anchor_cell) + nb.cells.insert(0, error_msg_cell) + write_ipynb(nb, output_path) raise error diff --git a/papermill/tests/notebooks/broken1.ipynb b/papermill/tests/notebooks/broken1.ipynb index b24dcba6..5b8f5e94 100644 --- a/papermill/tests/notebooks/broken1.ipynb +++ b/papermill/tests/notebooks/broken1.ipynb @@ -1,5 +1,34 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "papermill-error-cell-tag" + ] + }, + "source": [ + "An Exception was encountered at 'In [1]'." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A markdown cell that makes the execution counts different to indices within the list of all cells." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "papermill-error-cell-tag" + ] + }, + "source": [ + "Execution encountered an exception here and stopped:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -41,6 +70,20 @@ "print(\"We're good.\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another markdown cell" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A third one." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/papermill/tests/test_adl.py b/papermill/tests/test_adl.py index 6baa8f25..6195c9f4 100644 --- a/papermill/tests/test_adl.py +++ b/papermill/tests/test_adl.py @@ -44,7 +44,7 @@ def test_listdir_calls_ls_on_adl_adapter(self): self.ls.assert_called_once_with("path/to/directory") def test_read_opens_and_reads_file(self): - self.assertEquals( + self.assertEqual( self.adl.read("adl://foo_store.azuredatalakestore.net/path/to/file"), ["a", "b", "c"] ) self.fakeFile.__iter__.assert_called_once_with() diff --git a/papermill/tests/test_cli.py b/papermill/tests/test_cli.py index ae92f39a..5760c250 100755 --- a/papermill/tests/test_cli.py +++ b/papermill/tests/test_cli.py @@ -6,6 +6,7 @@ import sys import subprocess import uuid +import nbclient import nbformat from jupyter_client import kernelspec @@ -178,6 +179,16 @@ def test_parameters_yaml_override(self, execute_patch): ) ) + @patch( + cli.__name__ + '.execute_notebook', side_effect=nbclient.exceptions.DeadKernelError("Fake") + ) + def test_parameters_dead_kernel(self, execute_patch): + result = self.runner.invoke( + papermill, + self.default_args + ['--parameters_yaml', '{"foo": "bar"}', '-y', '{"foo": ["baz"]}'], + ) + assert result.exit_code == 138 + @patch(cli.__name__ + '.execute_notebook') def test_parameters_base64(self, execute_patch): self.runner.invoke( diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index f7e290b1..1872ec38 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -144,16 +144,44 @@ def tearDown(self): def test(self): path = get_notebook_path('broken1.ipynb') + + # check that the notebook has two existing marker cells, so that this test is sure to be + # validating the removal logic (the markers are simulatin an error in the first code cell + # that has since been fixed) + original_nb = load_notebook_node(path) + self.assertEqual(original_nb.cells[0].metadata["tags"], ["papermill-error-cell-tag"]) + self.assertIn("In [1]", original_nb.cells[0].source) + self.assertEqual(original_nb.cells[2].metadata["tags"], ["papermill-error-cell-tag"]) + result_path = os.path.join(self.test_dir, 'broken1.ipynb') with self.assertRaises(PapermillExecutionError): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") - self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + self.assertRegex( + nb.cells[0].source, + r'^$' + ) + self.assertEqual(nb.cells[0].metadata["tags"], ["papermill-error-cell-tag"]) + + self.assertEqual(nb.cells[1].cell_type, "markdown") + self.assertEqual(nb.cells[2].execution_count, 1) + self.assertEqual(nb.cells[3].cell_type, "markdown") + self.assertEqual(nb.cells[4].cell_type, "markdown") + + self.assertEqual(nb.cells[5].cell_type, "markdown") + self.assertRegex(nb.cells[5].source, '') + self.assertEqual(nb.cells[5].metadata["tags"], ["papermill-error-cell-tag"]) + self.assertEqual(nb.cells[6].execution_count, 2) + self.assertEqual(nb.cells[6].outputs[0].output_type, 'error') + + self.assertEqual(nb.cells[7].execution_count, None) + + # double check the removal (the new cells above should be the only two tagged ones) + self.assertEqual( + sum("papermill-error-cell-tag" in cell.metadata.get("tags", []) for cell in nb.cells), + 2 + ) class TestBrokenNotebook2(unittest.TestCase): @@ -170,12 +198,19 @@ def test(self): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") + self.assertRegex( + nb.cells[0].source, + r'^.*In \[2\].*$' + ) self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'display_data') - self.assertEqual(nb.cells[2].outputs[1].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + + self.assertEqual(nb.cells[2].cell_type, "markdown") + self.assertRegex(nb.cells[2].source, '') + self.assertEqual(nb.cells[3].execution_count, 2) + self.assertEqual(nb.cells[3].outputs[0].output_type, 'display_data') + self.assertEqual(nb.cells[3].outputs[1].output_type, 'error') + + self.assertEqual(nb.cells[4].execution_count, None) class TestReportMode(unittest.TestCase): @@ -284,8 +319,15 @@ def test_sys_exit1(self): execute_notebook(get_notebook_path(notebook_name), result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") + self.assertRegex( + nb.cells[0].source, + r'^$' + ) self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + + self.assertEqual(nb.cells[2].cell_type, "markdown") + self.assertRegex(nb.cells[2].source, '') + self.assertEqual(nb.cells[3].execution_count, 2) + self.assertEqual(nb.cells[3].outputs[0].output_type, 'error') + + self.assertEqual(nb.cells[4].execution_count, None) diff --git a/papermill/tests/test_hdfs.py b/papermill/tests/test_hdfs.py index 504e887e..74b81a84 100644 --- a/papermill/tests/test_hdfs.py +++ b/papermill/tests/test_hdfs.py @@ -37,13 +37,17 @@ def setUp(self): def test_hdfs_listdir(self, mock_hdfs_filesystem): client = self.hdfs_handler._get_client() - self.assertEqual(self.hdfs_handler.listdir("hdfs:///Projects/"), ['test1.ipynb', 'test2.ipynb']) + self.assertEqual( + self.hdfs_handler.listdir("hdfs:///Projects/"), ['test1.ipynb', 'test2.ipynb'] + ) # Check if client is the same after calling self.assertIs(client, self.hdfs_handler._get_client()) def test_hdfs_read(self, mock_hdfs_filesystem): client = self.hdfs_handler._get_client() - self.assertEqual(self.hdfs_handler.read("hdfs:///Projects/test1.ipynb"), b'Content of notebook') + self.assertEqual( + self.hdfs_handler.read("hdfs:///Projects/test1.ipynb"), b'Content of notebook' + ) self.assertIs(client, self.hdfs_handler._get_client()) def test_hdfs_write(self, mock_hdfs_filesystem): diff --git a/papermill/tests/test_iorw.py b/papermill/tests/test_iorw.py index 41fd138d..be858f84 100644 --- a/papermill/tests/test_iorw.py +++ b/papermill/tests/test_iorw.py @@ -153,9 +153,7 @@ def test_read_yaml_with_invalid_file_extension(self): def test_read_stdin(self): file_content = u'Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ' with patch('sys.stdin', io.StringIO(file_content)): - self.assertEqual( - self.papermill_io.read("-"), file_content - ) + self.assertEqual(self.papermill_io.read("-"), file_content) def test_listdir(self): self.assertEqual(self.papermill_io.listdir("fake/path"), ["fake", "contents"]) diff --git a/papermill/translators.py b/papermill/translators.py index b71a7806..d19f5215 100644 --- a/papermill/translators.py +++ b/papermill/translators.py @@ -27,8 +27,8 @@ def find_translator(self, kernel_name, language): class Translator(object): @classmethod - def translate_raw_str(csl, val): - """Reusable by most interpreters.""" + def translate_raw_str(cls, val): + """Reusable by most interpreters""" return '{}'.format(val) @classmethod diff --git a/papermill/version.py b/papermill/version.py index dc58ef8a..c9e52937 100644 --- a/papermill/version.py +++ b/papermill/version.py @@ -1 +1 @@ -version = '2.1.0' +version = '2.1.1' diff --git a/pytest.ini b/pytest.ini index 0996ab88..bb30eeb1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,3 +2,6 @@ env = AWS_SECRET_ACCESS_KEY=foobar_secret AWS_ACCESS_KEY_ID=foobar_key +filterwarnings = + ignore:.*imp module is deprecated.*:DeprecationWarning +