Skip to content

Commit

Permalink
Adds notebook -- proving it works
Browse files Browse the repository at this point in the history
  • Loading branch information
skrawcz committed Jul 10, 2024
1 parent 2a53f32 commit 984d6cb
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 0 deletions.
175 changes: 175 additions & 0 deletions examples/materialization/using_types/notebook.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-06-25T00:00:13.662458Z",
"start_time": "2024-06-25T00:00:06.982077Z"
}
},
"source": "%load_ext hamilton.plugins.jupyter_magic\n",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/stefankrawczyk/.pyenv/versions/knowledge_retrieval-py39/lib/python3.9/site-packages/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n",
" warnings.warn(\n"
]
}
],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-25T00:00:25.003646Z",
"start_time": "2024-06-25T00:00:24.322577Z"
}
},
"cell_type": "code",
"source": [
"%%cell_to_module simple_etl --display\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"\n",
"from hamilton.htypes import DataLoaderMetadata, DataSaverMetadata\n",
"\n",
"\n",
"def raw_data() -> tuple[pd.DataFrame, DataLoaderMetadata]:\n",
" data = datasets.load_digits()\n",
" df = pd.DataFrame(data.data, columns=[f\"feature_{i}\" for i in range(data.data.shape[1])])\n",
" return df, DataLoaderMetadata.from_dataframe(df)\n",
"\n",
"\n",
"def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n",
" return raw_data\n",
"\n",
"\n",
"def saved_data(transformed_data: pd.DataFrame, filepath: str) -> DataSaverMetadata:\n",
" transformed_data.to_csv(filepath)\n",
" return DataSaverMetadata.from_file_and_dataframe(filepath, transformed_data)\n"
],
"id": "efd6c1b2417bb9cf",
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n -->\n<!-- Pages: 1 -->\n<svg width=\"406pt\" height=\"341pt\"\n viewBox=\"0.00 0.00 405.55 341.30\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 337.3)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-337.3 401.55,-337.3 401.55,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-136.3 8,-325.3 116.1,-325.3 116.1,-136.3 8,-136.3\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-308\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- raw_data -->\n<g id=\"node1\" class=\"node\">\n<title>raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M94.98,-126.1C94.98,-126.1 29.13,-126.1 29.13,-126.1 23.13,-126.1 17.13,-120.1 17.13,-114.1 17.13,-114.1 17.13,-74.5 17.13,-74.5 17.13,-68.5 23.13,-62.5 29.13,-62.5 29.13,-62.5 94.98,-62.5 94.98,-62.5 100.98,-62.5 106.98,-68.5 106.98,-74.5 106.98,-74.5 106.98,-114.1 106.98,-114.1 106.98,-120.1 100.98,-126.1 94.98,-126.1\"/>\n<text text-anchor=\"start\" x=\"32.43\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n<text text-anchor=\"start\" x=\"27.93\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- transformed_data -->\n<g id=\"node2\" class=\"node\">\n<title>transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M261.45,-126.1C261.45,-126.1 149.1,-126.1 149.1,-126.1 143.1,-126.1 137.1,-120.1 137.1,-114.1 137.1,-114.1 137.1,-74.5 137.1,-74.5 137.1,-68.5 143.1,-62.5 149.1,-62.5 149.1,-62.5 261.45,-62.5 261.45,-62.5 267.45,-62.5 273.45,-68.5 273.45,-74.5 273.45,-74.5 273.45,-114.1 273.45,-114.1 273.45,-120.1 267.45,-126.1 261.45,-126.1\"/>\n<text text-anchor=\"start\" x=\"147.9\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"171.15\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data&#45;&gt;transformed_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>raw_data&#45;&gt;transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M107.35,-94.3C113.21,-94.3 119.36,-94.3 125.59,-94.3\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"125.34,-97.8 135.34,-94.3 125.34,-90.8 125.34,-97.8\"/>\n</g>\n<!-- saved_data -->\n<g id=\"node3\" class=\"node\">\n<title>saved_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.55,-94.07C397.55,-98.46 376.24,-102.02 350,-102.02 323.76,-102.02 302.45,-98.46 302.45,-94.07 302.45,-94.07 302.45,-22.53 302.45,-22.53 302.45,-18.14 323.76,-14.58 350,-14.58 376.24,-14.58 397.55,-18.14 397.55,-22.53 397.55,-22.53 397.55,-94.07 397.55,-94.07\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M397.55,-94.07C397.55,-89.69 376.24,-86.12 350,-86.12 323.76,-86.12 302.45,-89.69 302.45,-94.07\"/>\n<text text-anchor=\"start\" x=\"313.25\" y=\"-67\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n<text text-anchor=\"start\" x=\"315.12\" y=\"-39\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data</text>\n</g>\n<!-- transformed_data&#45;&gt;saved_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>transformed_data&#45;&gt;saved_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M273.88,-77.25C279.67,-75.79 285.5,-74.32 291.21,-72.88\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"291.69,-76.37 300.53,-70.53 289.98,-69.58 291.69,-76.37\"/>\n</g>\n<!-- _saved_data_inputs -->\n<g id=\"node4\" class=\"node\">\n<title>_saved_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"252.33,-44.6 158.23,-44.6 158.23,0 252.33,0 252.33,-44.6\"/>\n<text text-anchor=\"start\" x=\"173.03\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n<text text-anchor=\"start\" x=\"222.53\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _saved_data_inputs&#45;&gt;saved_data -->\n<g id=\"edge3\" class=\"edge\">\n<title>_saved_data_inputs&#45;&gt;saved_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M252.61,-33.99C264.95,-37.1 278.43,-40.5 291.3,-43.75\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"290.39,-47.13 300.94,-46.18 292.1,-40.34 290.39,-47.13\"/>\n</g>\n<!-- input -->\n<g id=\"node5\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.05,-180.6 35.05,-180.6 35.05,-144 89.05,-144 89.05,-180.6\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-156.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node6\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.48,-235.6C84.48,-235.6 39.63,-235.6 39.63,-235.6 33.63,-235.6 27.63,-229.6 27.63,-223.6 27.63,-223.6 27.63,-211 27.63,-211 27.63,-205 33.63,-199 39.63,-199 39.63,-199 84.48,-199 84.48,-199 90.47,-199 96.48,-205 96.48,-211 96.48,-211 96.48,-223.6 96.48,-223.6 96.48,-229.6 90.48,-235.6 84.48,-235.6\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-211.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n<!-- materializer -->\n<g id=\"node7\" class=\"node\">\n<title>materializer</title>\n<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.1,-290.84C108.1,-292.87 87.46,-294.51 62.05,-294.51 36.64,-294.51 16,-292.87 16,-290.84 16,-290.84 16,-257.76 16,-257.76 16,-255.73 36.64,-254.09 62.05,-254.09 87.46,-254.09 108.1,-255.73 108.1,-257.76 108.1,-257.76 108.1,-290.84 108.1,-290.84\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M108.1,-290.84C108.1,-288.81 87.46,-287.16 62.05,-287.16 36.64,-287.16 16,-288.81 16,-290.84\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-268.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x152a40760>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-25T00:00:37.889540Z",
"start_time": "2024-06-25T00:00:35.994131Z"
}
},
"cell_type": "code",
"source": [
"from hamilton_sdk import adapters\n",
"\n",
"from hamilton import driver\n",
"\n",
"tracker = adapters.HamiltonTracker(\n",
" project_id=7, # modify this as needed\n",
" username=\"[email protected]\",\n",
" dag_name=\"my_version_of_the_dag\",\n",
" tags={\"environment\": \"DEV\", \"team\": \"MY_TEAM\", \"version\": \"X\"},\n",
")\n",
"dr = driver.Builder().with_config({}).with_modules(simple_etl).with_adapters(tracker).build()\n",
"dr.display_all_functions()"
],
"id": "e9252f2a09228330",
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n -->\n<!-- Pages: 1 -->\n<svg width=\"406pt\" height=\"341pt\"\n viewBox=\"0.00 0.00 405.55 341.30\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 337.3)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-337.3 401.55,-337.3 401.55,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-136.3 8,-325.3 116.1,-325.3 116.1,-136.3 8,-136.3\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-308\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- raw_data -->\n<g id=\"node1\" class=\"node\">\n<title>raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M94.98,-126.1C94.98,-126.1 29.13,-126.1 29.13,-126.1 23.13,-126.1 17.13,-120.1 17.13,-114.1 17.13,-114.1 17.13,-74.5 17.13,-74.5 17.13,-68.5 23.13,-62.5 29.13,-62.5 29.13,-62.5 94.98,-62.5 94.98,-62.5 100.98,-62.5 106.98,-68.5 106.98,-74.5 106.98,-74.5 106.98,-114.1 106.98,-114.1 106.98,-120.1 100.98,-126.1 94.98,-126.1\"/>\n<text text-anchor=\"start\" x=\"32.43\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n<text text-anchor=\"start\" x=\"27.93\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- transformed_data -->\n<g id=\"node2\" class=\"node\">\n<title>transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M261.45,-126.1C261.45,-126.1 149.1,-126.1 149.1,-126.1 143.1,-126.1 137.1,-120.1 137.1,-114.1 137.1,-114.1 137.1,-74.5 137.1,-74.5 137.1,-68.5 143.1,-62.5 149.1,-62.5 149.1,-62.5 261.45,-62.5 261.45,-62.5 267.45,-62.5 273.45,-68.5 273.45,-74.5 273.45,-74.5 273.45,-114.1 273.45,-114.1 273.45,-120.1 267.45,-126.1 261.45,-126.1\"/>\n<text text-anchor=\"start\" x=\"147.9\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"171.15\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data&#45;&gt;transformed_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>raw_data&#45;&gt;transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M107.35,-94.3C113.21,-94.3 119.36,-94.3 125.59,-94.3\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"125.34,-97.8 135.34,-94.3 125.34,-90.8 125.34,-97.8\"/>\n</g>\n<!-- saved_data -->\n<g id=\"node3\" class=\"node\">\n<title>saved_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.55,-94.07C397.55,-98.46 376.24,-102.02 350,-102.02 323.76,-102.02 302.45,-98.46 302.45,-94.07 302.45,-94.07 302.45,-22.53 302.45,-22.53 302.45,-18.14 323.76,-14.58 350,-14.58 376.24,-14.58 397.55,-18.14 397.55,-22.53 397.55,-22.53 397.55,-94.07 397.55,-94.07\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M397.55,-94.07C397.55,-89.69 376.24,-86.12 350,-86.12 323.76,-86.12 302.45,-89.69 302.45,-94.07\"/>\n<text text-anchor=\"start\" x=\"313.25\" y=\"-67\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n<text text-anchor=\"start\" x=\"315.12\" y=\"-39\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data</text>\n</g>\n<!-- transformed_data&#45;&gt;saved_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>transformed_data&#45;&gt;saved_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M273.88,-77.25C279.67,-75.79 285.5,-74.32 291.21,-72.88\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"291.69,-76.37 300.53,-70.53 289.98,-69.58 291.69,-76.37\"/>\n</g>\n<!-- _saved_data_inputs -->\n<g id=\"node4\" class=\"node\">\n<title>_saved_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"252.33,-44.6 158.23,-44.6 158.23,0 252.33,0 252.33,-44.6\"/>\n<text text-anchor=\"start\" x=\"173.03\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n<text text-anchor=\"start\" x=\"222.53\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _saved_data_inputs&#45;&gt;saved_data -->\n<g id=\"edge3\" class=\"edge\">\n<title>_saved_data_inputs&#45;&gt;saved_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M252.61,-33.99C264.95,-37.1 278.43,-40.5 291.3,-43.75\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"290.39,-47.13 300.94,-46.18 292.1,-40.34 290.39,-47.13\"/>\n</g>\n<!-- input -->\n<g id=\"node5\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.05,-180.6 35.05,-180.6 35.05,-144 89.05,-144 89.05,-180.6\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-156.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node6\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.48,-235.6C84.48,-235.6 39.63,-235.6 39.63,-235.6 33.63,-235.6 27.63,-229.6 27.63,-223.6 27.63,-223.6 27.63,-211 27.63,-211 27.63,-205 33.63,-199 39.63,-199 39.63,-199 84.48,-199 84.48,-199 90.47,-199 96.48,-205 96.48,-211 96.48,-211 96.48,-223.6 96.48,-223.6 96.48,-229.6 90.48,-235.6 84.48,-235.6\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-211.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n<!-- materializer -->\n<g id=\"node7\" class=\"node\">\n<title>materializer</title>\n<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.1,-290.84C108.1,-292.87 87.46,-294.51 62.05,-294.51 36.64,-294.51 16,-292.87 16,-290.84 16,-290.84 16,-257.76 16,-257.76 16,-255.73 36.64,-254.09 62.05,-254.09 87.46,-254.09 108.1,-255.73 108.1,-257.76 108.1,-257.76 108.1,-290.84 108.1,-290.84\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M108.1,-290.84C108.1,-288.81 87.46,-287.16 62.05,-287.16 36.64,-287.16 16,-288.81 16,-290.84\"/>\n<text text-anchor=\"middle\" x=\"62.05\" y=\"-268.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x1530081f0>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 3
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-25T00:00:53.746596Z",
"start_time": "2024-06-25T00:00:52.320439Z"
}
},
"cell_type": "code",
"source": "dr.execute([\"saved_data\"], inputs={\"filepath\": \"data.csv\"})",
"id": "86c0d0f7da9a472b",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Capturing execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/25\n",
"\n",
"\n",
"Captured execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/25\n",
"\n"
]
},
{
"data": {
"text/plain": [
"{'saved_data': <hamilton.htypes.DataSaverMetadata at 0x1528de280>}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "",
"id": "e108601ca3a88aab"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Binary file modified examples/materialization/using_types/simple_etl.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 984d6cb

Please sign in to comment.