From 0e4b3efbd4908718e753a99fe09724596db2dc63 Mon Sep 17 00:00:00 2001
From: Merari Santana <santanamerari@gmail.com>
Date: Fri, 24 Jan 2025 21:51:00 -0800
Subject: [PATCH 1/4] qq_and_residuals_plot function documentation added to
 example.ipynb

---
 docs/example.ipynb | 411 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 316 insertions(+), 95 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 04394cf..ecd5d2e 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -1,97 +1,318 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "# Example usage\n",
-                "\n",
-                "To use `linreg_ally` in a project:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "import linreg_ally\n",
-                "\n",
-                "print(linreg_ally.__version__)"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Imports"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": []
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Paramveer - EDA"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Alex - VIF "
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Cheng - model fitting"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# Merari - plot"
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.8.5"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 4
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example usage\n",
+    "\n",
+    "To use `linreg_ally` in a project:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "import linreg_ally\n",
+    "\n",
+    "print(linreg_ally.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Paramveer - EDA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Alex - VIF "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cheng - model fitting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model Summary\n",
+      "------------------------\n",
+      "Test r2: 0.785\n",
+      "Test neg_mean_squared_error: 345.987\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Merari - plot (using Cheng's function outputs)\n",
+    "from vega_datasets import data\n",
+    "from linreg_ally.models import run_linear_regression\n",
+    "\n",
+    "df = data.cars()\n",
+    "df = df[['Horsepower', 'Displacement']].dropna()\n",
+    "\n",
+    "# Define parameters for run_linear_regression\n",
+    "dataframe = df\n",
+    "target_column = \"Horsepower\"\n",
+    "numeric_feats = [\"Displacement\"] \n",
+    "categorical_feats = []  # No categorical features in this case\n",
+    "drop_feats = None  # No columns to drop\n",
+    "random_state = 123\n",
+    "\n",
+    "model_results = run_linear_regression(\n",
+    "    dataframe=dataframe,\n",
+    "    target_column=target_column,\n",
+    "    numeric_feats=numeric_feats,\n",
+    "    categorical_feats=categorical_feats,\n",
+    "    drop_feats=drop_feats,\n",
+    "    random_state=random_state\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking Normality and Homoscedasticity of Residuals\n",
+    "\n",
+    "A linear regression model assumes that residuals are normally distributed and have constant variance (homoscedasticity). To check whether these assumptions are met, we use the `qq_and_residuals_plot` function. This function generates:\n",
+    "\n",
+    "1. A Quantile-Quantile (Q-Q) plot to assess the normality of residuals.\n",
+    "2. A Residuals vs. Fitted Values plot to check for homoscedasticity."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `qq_and_residuals_plot` function takes two parameters: `y_actual` and `y_predicted`. These values were extracted from the linear regression model we previously created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Unpack the returned values correctly\n",
+    "best_model, X_train, X_test, y_train, y_test, scores = model_results\n",
+    "\n",
+    "# y_actual is y_test (true labels)\n",
+    "y_actual = y_test\n",
+    "\n",
+    "# y_predicted is obtained by predicting on X_test\n",
+    "y_predicted = best_model.predict(X_test)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that `y_actual` and `y_predicted` have been extracted, let's pass these parameters to the `qq_and_residuals_plot` function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "<style>\n",
+       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed {\n",
+       "    width: 100%;\n",
+       "    display: flex;\n",
+       "  }\n",
+       "\n",
+       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed details,\n",
+       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed details summary {\n",
+       "    position: relative;\n",
+       "  }\n",
+       "</style>\n",
+       "<div id=\"altair-viz-d38bc12b263d47039958d7f1198a649d\"></div>\n",
+       "<script type=\"text/javascript\">\n",
+       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
+       "  (function(spec, embedOpt){\n",
+       "    let outputDiv = document.currentScript.previousElementSibling;\n",
+       "    if (outputDiv.id !== \"altair-viz-d38bc12b263d47039958d7f1198a649d\") {\n",
+       "      outputDiv = document.getElementById(\"altair-viz-d38bc12b263d47039958d7f1198a649d\");\n",
+       "    }\n",
+       "\n",
+       "    const paths = {\n",
+       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
+       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
+       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
+       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
+       "    };\n",
+       "\n",
+       "    function maybeLoadScript(lib, version) {\n",
+       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
+       "      return (VEGA_DEBUG[key] == version) ?\n",
+       "        Promise.resolve(paths[lib]) :\n",
+       "        new Promise(function(resolve, reject) {\n",
+       "          var s = document.createElement('script');\n",
+       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
+       "          s.async = true;\n",
+       "          s.onload = () => {\n",
+       "            VEGA_DEBUG[key] = version;\n",
+       "            return resolve(paths[lib]);\n",
+       "          };\n",
+       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
+       "          s.src = paths[lib];\n",
+       "        });\n",
+       "    }\n",
+       "\n",
+       "    function showError(err) {\n",
+       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
+       "      throw err;\n",
+       "    }\n",
+       "\n",
+       "    function displayChart(vegaEmbed) {\n",
+       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
+       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
+       "    }\n",
+       "\n",
+       "    if(typeof define === \"function\" && define.amd) {\n",
+       "      requirejs.config({paths});\n",
+       "      let deps = [\"vega-embed\"];\n",
+       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
+       "    } else {\n",
+       "      maybeLoadScript(\"vega\", \"5\")\n",
+       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
+       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
+       "        .catch(showError)\n",
+       "        .then(() => displayChart(vegaEmbed));\n",
+       "    }\n",
+       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"layer\": [{\"data\": {\"name\": \"data-d3c8f6fbea1a29b4dd8291e7d0e3b221\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"title\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"title\": \"Standardized Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Q-Q Plot\"}, {\"data\": {\"name\": \"data-85d19c836a4395e2ec54e74d1ddf6151\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}, {\"layer\": [{\"data\": {\"name\": \"data-6441b55efb36736b85f0137b061e2a04\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"title\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"title\": \"Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Residuals vs. Fitted Values\"}, {\"data\": {\"name\": \"data-12fe4be04ac697c852a266ecaf7edd18\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}], \"resolve\": {\"scale\": {\"y\": \"independent\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d3c8f6fbea1a29b4dd8291e7d0e3b221\": [{\"Theoretical Quantiles\": -2.3812536889575733, \"Standardized Residuals\": -1.9805314222954296}, {\"Theoretical Quantiles\": -2.0347954265185875, \"Standardized Residuals\": -1.9805314222954296}, {\"Theoretical Quantiles\": -1.833300352515852, \"Standardized Residuals\": -1.7544228653600145}, {\"Theoretical Quantiles\": -1.6867867062894633, \"Standardized Residuals\": -1.5525468977185917}, {\"Theoretical Quantiles\": -1.569502797662147, \"Standardized Residuals\": -1.3857016087978529}, {\"Theoretical Quantiles\": -1.4705336327250025, \"Standardized Residuals\": -1.3310846811981374}, {\"Theoretical Quantiles\": -1.3841787147489777, \"Standardized Residuals\": -1.2547169935124676}, {\"Theoretical Quantiles\": -1.3070663019006745, \"Standardized Residuals\": -1.2547169935124676}, {\"Theoretical Quantiles\": -1.2370281009745532, \"Standardized Residuals\": -1.1575994329600334}, {\"Theoretical Quantiles\": -1.1725829105655199, \"Standardized Residuals\": -0.9675142864285177}, {\"Theoretical Quantiles\": -1.112671450388508, \"Standardized Residuals\": -0.8924729594991264}, {\"Theoretical Quantiles\": -1.0565083743074666, \"Standardized Residuals\": -0.8924729594991264}, {\"Theoretical Quantiles\": -1.0034942031141953, \"Standardized Residuals\": -0.886996624169495}, {\"Theoretical Quantiles\": -0.9531601612854598, \"Standardized Residuals\": -0.886996624169495}, {\"Theoretical Quantiles\": -0.9051321464450794, \"Standardized Residuals\": -0.8521368377533294}, {\"Theoretical Quantiles\": -0.8591063593654472, \"Standardized Residuals\": -0.8255688449478962}, {\"Theoretical Quantiles\": -0.8148323257668638, \"Standardized Residuals\": -0.7229749490658313}, {\"Theoretical Quantiles\": -0.772100763269455, \"Standardized Residuals\": -0.7199885984802106}, {\"Theoretical Quantiles\": -0.7307347169999672, \"Standardized Residuals\": -0.7135113759313909}, {\"Theoretical Quantiles\": -0.6905829562425948, \"Standardized Residuals\": -0.6852915488325912}, {\"Theoretical Quantiles\": -0.6515149699015615, \"Standardized Residuals\": -0.6779843313691002}, {\"Theoretical Quantiles\": -0.6134171146978764, \"Standardized Residuals\": -0.672011630197861}, {\"Theoretical Quantiles\": -0.5761896089821583, \"Standardized Residuals\": -0.672011630197861}, {\"Theoretical Quantiles\": -0.5397441565364203, \"Standardized Residuals\": -0.5665859649627466}, {\"Theoretical Quantiles\": -0.504002046276566, \"Standardized Residuals\": -0.47527836881300667}, {\"Theoretical Quantiles\": -0.46889261596369736, \"Standardized Residuals\": -0.4732929054465757}, {\"Theoretical Quantiles\": -0.43435199747337233, \"Standardized Residuals\": -0.43842496349443666}, {\"Theoretical Quantiles\": -0.40032208204007963, \"Standardized Residuals\": -0.39924431020039997}, {\"Theoretical Quantiles\": -0.36674965889935734, \"Standardized Residuals\": -0.384304401736329}, {\"Theoretical Quantiles\": -0.3335856916809001, \"Standardized Residuals\": -0.31208668862401295}, {\"Theoretical Quantiles\": -0.30078470496348647, \"Standardized Residuals\": -0.297146780159942}, {\"Theoretical Quantiles\": -0.26830425940534525, \"Standardized Residuals\": -0.2800505160249226}, {\"Theoretical Quantiles\": -0.23610449837782, \"Standardized Residuals\": -0.23854261475541844}, {\"Theoretical Quantiles\": -0.20414775245253242, \"Standardized Residuals\": -0.221942716462006}, {\"Theoretical Quantiles\": -0.17239819070374948, \"Standardized Residuals\": -0.18442205299731076}, {\"Theoretical Quantiles\": -0.140821509789587, \"Standardized Residuals\": -0.14391503894699556}, {\"Theoretical Quantiles\": -0.10938465331335388, \"Standardized Residuals\": -0.14325593633684305}, {\"Theoretical Quantiles\": -0.07805555514535621, \"Standardized Residuals\": -0.134785094885619}, {\"Theoretical Quantiles\": -0.046802901282789286, \"Standardized Residuals\": -0.11253796895805693}, {\"Theoretical Quantiles\": -0.01559590549693945, \"Standardized Residuals\": -0.09427808083530378}, {\"Theoretical Quantiles\": 0.01559590549693945, \"Standardized Residuals\": -0.09012810626195088}, {\"Theoretical Quantiles\": 0.04680290128278914, \"Standardized Residuals\": -0.04563385440682752}, {\"Theoretical Quantiles\": 0.07805555514535621, \"Standardized Residuals\": -0.04065388491880386}, {\"Theoretical Quantiles\": 0.10938465331335402, \"Standardized Residuals\": -0.02654397136940364}, {\"Theoretical Quantiles\": 0.140821509789587, \"Standardized Residuals\": 0.027576590388704036}, {\"Theoretical Quantiles\": 0.17239819070374962, \"Standardized Residuals\": 0.03372018386446141}, {\"Theoretical Quantiles\": 0.20414775245253228, \"Standardized Residuals\": 0.03372018386446141}, {\"Theoretical Quantiles\": 0.23610449837782, \"Standardized Residuals\": 0.08169715214681171}, {\"Theoretical Quantiles\": 0.26830425940534525, \"Standardized Residuals\": 0.09480617847702374}, {\"Theoretical Quantiles\": 0.3007847049634866, \"Standardized Residuals\": 0.12734687245369533}, {\"Theoretical Quantiles\": 0.33358569168090035, \"Standardized Residuals\": 0.13979679617375407}, {\"Theoretical Quantiles\": 0.3667496588993575, \"Standardized Residuals\": 0.18827828583368558}, {\"Theoretical Quantiles\": 0.4003220820400798, \"Standardized Residuals\": 0.19608186913878445}, {\"Theoretical Quantiles\": 0.43435199747337233, \"Standardized Residuals\": 0.23243890861574595}, {\"Theoretical Quantiles\": 0.4688926159636975, \"Standardized Residuals\": 0.3340319172786227}, {\"Theoretical Quantiles\": 0.504002046276566, \"Standardized Residuals\": 0.3654089872675621}, {\"Theoretical Quantiles\": 0.5397441565364204, \"Standardized Residuals\": 0.3956305888047397}, {\"Theoretical Quantiles\": 0.5761896089821587, \"Standardized Residuals\": 0.41039960496429245}, {\"Theoretical Quantiles\": 0.6134171146978764, \"Standardized Residuals\": 0.4157132035253791}, {\"Theoretical Quantiles\": 0.6515149699015617, \"Standardized Residuals\": 0.45224113530685967}, {\"Theoretical Quantiles\": 0.6905829562425948, \"Standardized Residuals\": 0.458881094624224}, {\"Theoretical Quantiles\": 0.7307347169999674, \"Standardized Residuals\": 0.609289222020096}, {\"Theoretical Quantiles\": 0.7721007632694554, \"Standardized Residuals\": 0.6360117960581005}, {\"Theoretical Quantiles\": 0.8148323257668638, \"Standardized Residuals\": 0.7372874867198612}, {\"Theoretical Quantiles\": 0.8591063593654474, \"Standardized Residuals\": 0.7372874867198612}, {\"Theoretical Quantiles\": 0.9051321464450799, \"Standardized Residuals\": 0.7739699997339122}, {\"Theoretical Quantiles\": 0.9531601612854605, \"Standardized Residuals\": 0.8124915504177965}, {\"Theoretical Quantiles\": 1.0034942031141958, \"Standardized Residuals\": 0.8319150625282833}, {\"Theoretical Quantiles\": 1.0565083743074666, \"Standardized Residuals\": 0.8521685695534417}, {\"Theoretical Quantiles\": 1.112671450388508, \"Standardized Residuals\": 0.9516052225453693}, {\"Theoretical Quantiles\": 1.1725829105655203, \"Standardized Residuals\": 1.0528727576711565}, {\"Theoretical Quantiles\": 1.2370281009745536, \"Standardized Residuals\": 1.2079353810179627}, {\"Theoretical Quantiles\": 1.307066301900675, \"Standardized Residuals\": 1.2222080313359076}, {\"Theoretical Quantiles\": 1.3841787147489786, \"Standardized Residuals\": 1.5673558012142295}, {\"Theoretical Quantiles\": 1.470533632725003, \"Standardized Residuals\": 1.6098564341669492}, {\"Theoretical Quantiles\": 1.5695027976621483, \"Standardized Residuals\": 1.7924634709304548}, {\"Theoretical Quantiles\": 1.6867867062894644, \"Standardized Residuals\": 1.8573821221152547}, {\"Theoretical Quantiles\": 1.8333003525158538, \"Standardized Residuals\": 2.2299197241777065}, {\"Theoretical Quantiles\": 2.03479542651859, \"Standardized Residuals\": 2.9387924700582193}, {\"Theoretical Quantiles\": 2.3812536889575733, \"Standardized Residuals\": 3.4142575970757996}], \"data-85d19c836a4395e2ec54e74d1ddf6151\": [{\"Theoretical Quantiles\": -3, \"Standardized Residuals\": -3}, {\"Theoretical Quantiles\": 3, \"Standardized Residuals\": 3}], \"data-6441b55efb36736b85f0137b061e2a04\": [{\"Fitted Values\": 171.68846318108342, \"Residuals\": 8.31153681891658}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -34.45526500991538}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -30.277397124027402}, {\"Fitted Values\": 75.84783740787631, \"Residuals\": 17.152162592123688}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -5.961354216944471}, {\"Fitted Values\": 70.26807494847726, \"Residuals\": -1.2680749484772633}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -34.45526500991538}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 1.649080315607847}, {\"Fitted Values\": 70.26807494847726, \"Residuals\": -2.2680749484772633}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 19.722602875972598}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -3.350919684392153}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 24.722602875972598}, {\"Fitted Values\": 95.54111667634352, \"Residuals\": 24.45888332365648}, {\"Fitted Values\": 144.77431484751156, \"Residuals\": 65.22568515248844}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": 17.88528541728914}, {\"Fitted Values\": 66.32941909478382, \"Residuals\": -8.329419094783816}, {\"Fitted Values\": 72.56562419646511, \"Residuals\": 6.434375803534891}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -10.277397124027402}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -22.45526500991538}, {\"Fitted Values\": 168.40624996967222, \"Residuals\": 21.593750030327783}, {\"Fitted Values\": 116.54728122937522, \"Residuals\": -26.547281229375216}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -21.04419895285936}, {\"Fitted Values\": 141.16388031495921, \"Residuals\": -11.163880314959215}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 2.76259712467602}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 3.8912992783156426}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 13.891299278315643}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 9.722602875972598}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -1.9613542169444713}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 2.649080315607847}, {\"Fitted Values\": 68.62696834277166, \"Residuals\": -3.6269683427716615}, {\"Fitted Values\": 139.52277370925364, \"Residuals\": -10.522773709253642}, {\"Fitted Values\": 158.5596103354386, \"Residuals\": 16.440389664561394}, {\"Fitted Values\": 69.61163230619502, \"Residuals\": 1.3883676938049803}, {\"Fitted Values\": 73.55028815988847, \"Residuals\": 9.449711840111533}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": -5.23740287532398}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -14.24973198138737}, {\"Fitted Values\": 95.54111667634352, \"Residuals\": 36.45888332365648}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": -13.11471458271086}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 4.722602875972598}, {\"Fitted Values\": 128.03502746931443, \"Residuals\": -3.0350274693144286}, {\"Fitted Values\": 155.60561844516852, \"Residuals\": -6.605618445168517}, {\"Fitted Values\": 155.60561844516852, \"Residuals\": -13.605618445168517}, {\"Fitted Values\": 140.50743767267699, \"Residuals\": 4.492562327323014}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -19.24973198138737}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -11.044198952859361}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 3.649080315607847}, {\"Fitted Values\": 75.51961608673518, \"Residuals\": -0.5196160867351836}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 2.76259712467602}, {\"Fitted Values\": 91.60246082265007, \"Residuals\": 0.39753917734992683}, {\"Fitted Values\": 66.65764041592494, \"Residuals\": 43.342359584075055}, {\"Fitted Values\": 76.50428005015854, \"Residuals\": 10.495719949841458}, {\"Fitted Values\": 189.74063584384504, \"Residuals\": 35.25936415615496}, {\"Fitted Values\": 84.38159175754544, \"Residuals\": 10.618408242454564}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -14.24973198138737}, {\"Fitted Values\": 93.90001007063792, \"Residuals\": 31.09998992936208}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 15.76259712467602}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 8.891299278315643}, {\"Fitted Values\": 100.46443649346033, \"Residuals\": -23.464436493460326}, {\"Fitted Values\": 140.50743767267699, \"Residuals\": -0.5074376726769856}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": 31.88528541728914}, {\"Fitted Values\": 158.5596103354386, \"Residuals\": 56.440389664561394}, {\"Fitted Values\": 77.48894401358191, \"Residuals\": 17.511055986418086}, {\"Fitted Values\": 63.703648525654856, \"Residuals\": 1.296351474345144}, {\"Fitted Values\": 125.73747822132658, \"Residuals\": -15.737478221326583}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -0.35091968439215293}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -4.961354216944471}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -10.277397124027402}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -14.350919684392153}, {\"Fitted Values\": 105.38775631057713, \"Residuals\": -10.387756310577132}, {\"Fitted Values\": 67.6423043793483, \"Residuals\": -6.642304379348303}, {\"Fitted Values\": 140.17921635153587, \"Residuals\": 9.820783648464129}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -21.04419895285936}, {\"Fitted Values\": 116.2190599082341, \"Residuals\": -11.219059908234101}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -14.350919684392153}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 5.76259712467602}, {\"Fitted Values\": 96.52578063976688, \"Residuals\": 0.4742193602331213}, {\"Fitted Values\": 69.93985362733613, \"Residuals\": 0.06014637266386558}, {\"Fitted Values\": 84.38159175754544, \"Residuals\": 5.618408242454564}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 15.76259712467602}, {\"Fitted Values\": 91.60246082265007, \"Residuals\": 13.397539177349927}], \"data-12fe4be04ac697c852a266ecaf7edd18\": [{\"Fitted Values\": 61.703648525654856, \"Residuals\": 0}, {\"Fitted Values\": 191.74063584384504, \"Residuals\": 0}]}}, {\"mode\": \"vega-lite\"});\n",
+       "</script>"
+      ],
+      "text/plain": [
+       "alt.HConcatChart(...)"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from linreg_ally.plotting import qq_and_residuals_plot\n",
+    "\n",
+    "qq_and_residuals_plot(y_actual, y_predicted)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Interpreting the Q-Q Plot\n",
+    "\n",
+    "If the Q-Q plot shows a significant deviation from the red dashed line (which represents perfect normality), the residuals are not normally distributed. In our plot, a few points deviate from the line at the tails, suggesting potential skewness or outliers. However, since these deviations are minor, we can conclude that the residuals are approximately normal."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Interpreting the Residuals vs. Fitted Values Plot\n",
+    "\n",
+    "For the homoscedasticity assumption to hold, residuals should be randomly scattered around the red dashed line in the Residuals vs. Fitted Values plot. This would indicate that residual variance remains constant across all fitted values (homoscedasticity).\n",
+    "\n",
+    "However, in our case, residuals cluster at different fitted value ranges, suggesting that the variance is not constant (heteroscedasticity)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implications of Assumption Violations\n",
+    "\n",
+    "If the normality assumption is violated:\n",
+    "Ordinary Least Squares (OLS) regression still produces best linear unbiased estimates (BLUE) as long as independence and homoscedasticity hold. However, hypothesis tests and confidence intervals may be misleading if residuals deviate significantly from normality.\n",
+    "\n",
+    "If the homoscedasticity assumption is violated:\n",
+    "You can still fit a linear regression model, but you should interpret results with caution. The estimated coefficients remain unbiased, but standard errors and p-values become unreliable, affecting statistical inference."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "The `qq_and_residuals_plot` function is a valuable tool for assessing the normality and homoscedasticity assumptions in linear regression. If these assumptions are violated, you should consider corrective measures such as:\n",
+    "\n",
+    "- Transforming variables (e.g., logarithmic transformation),\n",
+    "- Using robust standard errors, or\n",
+    "- Exploring alternative models (e.g., weighted least squares, generalized least squares)."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
 }

From fd0a46404c5223cf7ab14c1b503eb0c71cc0c6d2 Mon Sep 17 00:00:00 2001
From: Merari Santana <santanamerari@gmail.com>
Date: Fri, 24 Jan 2025 21:55:18 -0800
Subject: [PATCH 2/4] added comment to delete my first code chunk if Cheng uses
 the same arguments for his run_linear_regression function

---
 docs/example.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index ecd5d2e..f830e37 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -88,6 +88,7 @@
     }
    ],
    "source": [
+    "# DELETE THIS CODE CHUNK IF CHENG USES THE SAME ARGUMENTS FOR HIS RUN_LINEAR_REGRESSION FUNCTION\n",
     "# Merari - plot (using Cheng's function outputs)\n",
     "from vega_datasets import data\n",
     "from linreg_ally.models import run_linear_regression\n",

From eddb01dbfdcda9da1091f6b20d51b75e7499a95c Mon Sep 17 00:00:00 2001
From: Merari Santana <santanamerari@gmail.com>
Date: Sat, 25 Jan 2025 00:02:57 -0800
Subject: [PATCH 3/4] modified my code and explanations to fit with Chengs
 outputs. Plots should show up once integrated with Chengs code blocks

---
 docs/example.ipynb | 141 +++++----------------------------------------
 1 file changed, 15 insertions(+), 126 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index f830e37..96da9c3 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -71,49 +71,6 @@
     "# Cheng - model fitting"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model Summary\n",
-      "------------------------\n",
-      "Test r2: 0.785\n",
-      "Test neg_mean_squared_error: 345.987\n"
-     ]
-    }
-   ],
-   "source": [
-    "# DELETE THIS CODE CHUNK IF CHENG USES THE SAME ARGUMENTS FOR HIS RUN_LINEAR_REGRESSION FUNCTION\n",
-    "# Merari - plot (using Cheng's function outputs)\n",
-    "from vega_datasets import data\n",
-    "from linreg_ally.models import run_linear_regression\n",
-    "\n",
-    "df = data.cars()\n",
-    "df = df[['Horsepower', 'Displacement']].dropna()\n",
-    "\n",
-    "# Define parameters for run_linear_regression\n",
-    "dataframe = df\n",
-    "target_column = \"Horsepower\"\n",
-    "numeric_feats = [\"Displacement\"] \n",
-    "categorical_feats = []  # No categorical features in this case\n",
-    "drop_feats = None  # No columns to drop\n",
-    "random_state = 123\n",
-    "\n",
-    "model_results = run_linear_regression(\n",
-    "    dataframe=dataframe,\n",
-    "    target_column=target_column,\n",
-    "    numeric_feats=numeric_feats,\n",
-    "    categorical_feats=categorical_feats,\n",
-    "    drop_feats=drop_feats,\n",
-    "    random_state=random_state\n",
-    ")\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -139,14 +96,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Unpack the returned values correctly\n",
-    "best_model, X_train, X_test, y_train, y_test, scores = model_results\n",
+    "#Using Cheng's outputs! Tested my `qq_and_residuals_plot` function on his branch\n",
     "\n",
     "# y_actual is y_test (true labels)\n",
     "y_actual = y_test\n",
     "\n",
     "# y_predicted is obtained by predicting on X_test\n",
-    "y_predicted = best_model.predict(X_test)\n"
+    "y_predicted = best_model.predict(X_test)"
    ]
   },
   {
@@ -158,91 +114,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "\n",
-       "<style>\n",
-       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed {\n",
-       "    width: 100%;\n",
-       "    display: flex;\n",
-       "  }\n",
-       "\n",
-       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed details,\n",
-       "  #altair-viz-d38bc12b263d47039958d7f1198a649d.vega-embed details summary {\n",
-       "    position: relative;\n",
-       "  }\n",
-       "</style>\n",
-       "<div id=\"altair-viz-d38bc12b263d47039958d7f1198a649d\"></div>\n",
-       "<script type=\"text/javascript\">\n",
-       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
-       "  (function(spec, embedOpt){\n",
-       "    let outputDiv = document.currentScript.previousElementSibling;\n",
-       "    if (outputDiv.id !== \"altair-viz-d38bc12b263d47039958d7f1198a649d\") {\n",
-       "      outputDiv = document.getElementById(\"altair-viz-d38bc12b263d47039958d7f1198a649d\");\n",
-       "    }\n",
-       "\n",
-       "    const paths = {\n",
-       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
-       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
-       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
-       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
-       "    };\n",
-       "\n",
-       "    function maybeLoadScript(lib, version) {\n",
-       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
-       "      return (VEGA_DEBUG[key] == version) ?\n",
-       "        Promise.resolve(paths[lib]) :\n",
-       "        new Promise(function(resolve, reject) {\n",
-       "          var s = document.createElement('script');\n",
-       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
-       "          s.async = true;\n",
-       "          s.onload = () => {\n",
-       "            VEGA_DEBUG[key] = version;\n",
-       "            return resolve(paths[lib]);\n",
-       "          };\n",
-       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
-       "          s.src = paths[lib];\n",
-       "        });\n",
-       "    }\n",
-       "\n",
-       "    function showError(err) {\n",
-       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
-       "      throw err;\n",
-       "    }\n",
-       "\n",
-       "    function displayChart(vegaEmbed) {\n",
-       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
-       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
-       "    }\n",
-       "\n",
-       "    if(typeof define === \"function\" && define.amd) {\n",
-       "      requirejs.config({paths});\n",
-       "      let deps = [\"vega-embed\"];\n",
-       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
-       "    } else {\n",
-       "      maybeLoadScript(\"vega\", \"5\")\n",
-       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
-       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
-       "        .catch(showError)\n",
-       "        .then(() => displayChart(vegaEmbed));\n",
-       "    }\n",
-       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"layer\": [{\"data\": {\"name\": \"data-d3c8f6fbea1a29b4dd8291e7d0e3b221\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"title\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"title\": \"Standardized Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Q-Q Plot\"}, {\"data\": {\"name\": \"data-85d19c836a4395e2ec54e74d1ddf6151\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}, {\"layer\": [{\"data\": {\"name\": \"data-6441b55efb36736b85f0137b061e2a04\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"title\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"title\": \"Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Residuals vs. Fitted Values\"}, {\"data\": {\"name\": \"data-12fe4be04ac697c852a266ecaf7edd18\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}], \"resolve\": {\"scale\": {\"y\": \"independent\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-d3c8f6fbea1a29b4dd8291e7d0e3b221\": [{\"Theoretical Quantiles\": -2.3812536889575733, \"Standardized Residuals\": -1.9805314222954296}, {\"Theoretical Quantiles\": -2.0347954265185875, \"Standardized Residuals\": -1.9805314222954296}, {\"Theoretical Quantiles\": -1.833300352515852, \"Standardized Residuals\": -1.7544228653600145}, {\"Theoretical Quantiles\": -1.6867867062894633, \"Standardized Residuals\": -1.5525468977185917}, {\"Theoretical Quantiles\": -1.569502797662147, \"Standardized Residuals\": -1.3857016087978529}, {\"Theoretical Quantiles\": -1.4705336327250025, \"Standardized Residuals\": -1.3310846811981374}, {\"Theoretical Quantiles\": -1.3841787147489777, \"Standardized Residuals\": -1.2547169935124676}, {\"Theoretical Quantiles\": -1.3070663019006745, \"Standardized Residuals\": -1.2547169935124676}, {\"Theoretical Quantiles\": -1.2370281009745532, \"Standardized Residuals\": -1.1575994329600334}, {\"Theoretical Quantiles\": -1.1725829105655199, \"Standardized Residuals\": -0.9675142864285177}, {\"Theoretical Quantiles\": -1.112671450388508, \"Standardized Residuals\": -0.8924729594991264}, {\"Theoretical Quantiles\": -1.0565083743074666, \"Standardized Residuals\": -0.8924729594991264}, {\"Theoretical Quantiles\": -1.0034942031141953, \"Standardized Residuals\": -0.886996624169495}, {\"Theoretical Quantiles\": -0.9531601612854598, \"Standardized Residuals\": -0.886996624169495}, {\"Theoretical Quantiles\": -0.9051321464450794, \"Standardized Residuals\": -0.8521368377533294}, {\"Theoretical Quantiles\": -0.8591063593654472, \"Standardized Residuals\": -0.8255688449478962}, {\"Theoretical Quantiles\": -0.8148323257668638, \"Standardized Residuals\": -0.7229749490658313}, {\"Theoretical Quantiles\": -0.772100763269455, \"Standardized Residuals\": -0.7199885984802106}, {\"Theoretical Quantiles\": -0.7307347169999672, \"Standardized Residuals\": -0.7135113759313909}, {\"Theoretical Quantiles\": -0.6905829562425948, \"Standardized Residuals\": -0.6852915488325912}, {\"Theoretical Quantiles\": -0.6515149699015615, \"Standardized Residuals\": -0.6779843313691002}, {\"Theoretical Quantiles\": -0.6134171146978764, \"Standardized Residuals\": -0.672011630197861}, {\"Theoretical Quantiles\": -0.5761896089821583, \"Standardized Residuals\": -0.672011630197861}, {\"Theoretical Quantiles\": -0.5397441565364203, \"Standardized Residuals\": -0.5665859649627466}, {\"Theoretical Quantiles\": -0.504002046276566, \"Standardized Residuals\": -0.47527836881300667}, {\"Theoretical Quantiles\": -0.46889261596369736, \"Standardized Residuals\": -0.4732929054465757}, {\"Theoretical Quantiles\": -0.43435199747337233, \"Standardized Residuals\": -0.43842496349443666}, {\"Theoretical Quantiles\": -0.40032208204007963, \"Standardized Residuals\": -0.39924431020039997}, {\"Theoretical Quantiles\": -0.36674965889935734, \"Standardized Residuals\": -0.384304401736329}, {\"Theoretical Quantiles\": -0.3335856916809001, \"Standardized Residuals\": -0.31208668862401295}, {\"Theoretical Quantiles\": -0.30078470496348647, \"Standardized Residuals\": -0.297146780159942}, {\"Theoretical Quantiles\": -0.26830425940534525, \"Standardized Residuals\": -0.2800505160249226}, {\"Theoretical Quantiles\": -0.23610449837782, \"Standardized Residuals\": -0.23854261475541844}, {\"Theoretical Quantiles\": -0.20414775245253242, \"Standardized Residuals\": -0.221942716462006}, {\"Theoretical Quantiles\": -0.17239819070374948, \"Standardized Residuals\": -0.18442205299731076}, {\"Theoretical Quantiles\": -0.140821509789587, \"Standardized Residuals\": -0.14391503894699556}, {\"Theoretical Quantiles\": -0.10938465331335388, \"Standardized Residuals\": -0.14325593633684305}, {\"Theoretical Quantiles\": -0.07805555514535621, \"Standardized Residuals\": -0.134785094885619}, {\"Theoretical Quantiles\": -0.046802901282789286, \"Standardized Residuals\": -0.11253796895805693}, {\"Theoretical Quantiles\": -0.01559590549693945, \"Standardized Residuals\": -0.09427808083530378}, {\"Theoretical Quantiles\": 0.01559590549693945, \"Standardized Residuals\": -0.09012810626195088}, {\"Theoretical Quantiles\": 0.04680290128278914, \"Standardized Residuals\": -0.04563385440682752}, {\"Theoretical Quantiles\": 0.07805555514535621, \"Standardized Residuals\": -0.04065388491880386}, {\"Theoretical Quantiles\": 0.10938465331335402, \"Standardized Residuals\": -0.02654397136940364}, {\"Theoretical Quantiles\": 0.140821509789587, \"Standardized Residuals\": 0.027576590388704036}, {\"Theoretical Quantiles\": 0.17239819070374962, \"Standardized Residuals\": 0.03372018386446141}, {\"Theoretical Quantiles\": 0.20414775245253228, \"Standardized Residuals\": 0.03372018386446141}, {\"Theoretical Quantiles\": 0.23610449837782, \"Standardized Residuals\": 0.08169715214681171}, {\"Theoretical Quantiles\": 0.26830425940534525, \"Standardized Residuals\": 0.09480617847702374}, {\"Theoretical Quantiles\": 0.3007847049634866, \"Standardized Residuals\": 0.12734687245369533}, {\"Theoretical Quantiles\": 0.33358569168090035, \"Standardized Residuals\": 0.13979679617375407}, {\"Theoretical Quantiles\": 0.3667496588993575, \"Standardized Residuals\": 0.18827828583368558}, {\"Theoretical Quantiles\": 0.4003220820400798, \"Standardized Residuals\": 0.19608186913878445}, {\"Theoretical Quantiles\": 0.43435199747337233, \"Standardized Residuals\": 0.23243890861574595}, {\"Theoretical Quantiles\": 0.4688926159636975, \"Standardized Residuals\": 0.3340319172786227}, {\"Theoretical Quantiles\": 0.504002046276566, \"Standardized Residuals\": 0.3654089872675621}, {\"Theoretical Quantiles\": 0.5397441565364204, \"Standardized Residuals\": 0.3956305888047397}, {\"Theoretical Quantiles\": 0.5761896089821587, \"Standardized Residuals\": 0.41039960496429245}, {\"Theoretical Quantiles\": 0.6134171146978764, \"Standardized Residuals\": 0.4157132035253791}, {\"Theoretical Quantiles\": 0.6515149699015617, \"Standardized Residuals\": 0.45224113530685967}, {\"Theoretical Quantiles\": 0.6905829562425948, \"Standardized Residuals\": 0.458881094624224}, {\"Theoretical Quantiles\": 0.7307347169999674, \"Standardized Residuals\": 0.609289222020096}, {\"Theoretical Quantiles\": 0.7721007632694554, \"Standardized Residuals\": 0.6360117960581005}, {\"Theoretical Quantiles\": 0.8148323257668638, \"Standardized Residuals\": 0.7372874867198612}, {\"Theoretical Quantiles\": 0.8591063593654474, \"Standardized Residuals\": 0.7372874867198612}, {\"Theoretical Quantiles\": 0.9051321464450799, \"Standardized Residuals\": 0.7739699997339122}, {\"Theoretical Quantiles\": 0.9531601612854605, \"Standardized Residuals\": 0.8124915504177965}, {\"Theoretical Quantiles\": 1.0034942031141958, \"Standardized Residuals\": 0.8319150625282833}, {\"Theoretical Quantiles\": 1.0565083743074666, \"Standardized Residuals\": 0.8521685695534417}, {\"Theoretical Quantiles\": 1.112671450388508, \"Standardized Residuals\": 0.9516052225453693}, {\"Theoretical Quantiles\": 1.1725829105655203, \"Standardized Residuals\": 1.0528727576711565}, {\"Theoretical Quantiles\": 1.2370281009745536, \"Standardized Residuals\": 1.2079353810179627}, {\"Theoretical Quantiles\": 1.307066301900675, \"Standardized Residuals\": 1.2222080313359076}, {\"Theoretical Quantiles\": 1.3841787147489786, \"Standardized Residuals\": 1.5673558012142295}, {\"Theoretical Quantiles\": 1.470533632725003, \"Standardized Residuals\": 1.6098564341669492}, {\"Theoretical Quantiles\": 1.5695027976621483, \"Standardized Residuals\": 1.7924634709304548}, {\"Theoretical Quantiles\": 1.6867867062894644, \"Standardized Residuals\": 1.8573821221152547}, {\"Theoretical Quantiles\": 1.8333003525158538, \"Standardized Residuals\": 2.2299197241777065}, {\"Theoretical Quantiles\": 2.03479542651859, \"Standardized Residuals\": 2.9387924700582193}, {\"Theoretical Quantiles\": 2.3812536889575733, \"Standardized Residuals\": 3.4142575970757996}], \"data-85d19c836a4395e2ec54e74d1ddf6151\": [{\"Theoretical Quantiles\": -3, \"Standardized Residuals\": -3}, {\"Theoretical Quantiles\": 3, \"Standardized Residuals\": 3}], \"data-6441b55efb36736b85f0137b061e2a04\": [{\"Fitted Values\": 171.68846318108342, \"Residuals\": 8.31153681891658}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -34.45526500991538}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -30.277397124027402}, {\"Fitted Values\": 75.84783740787631, \"Residuals\": 17.152162592123688}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -5.961354216944471}, {\"Fitted Values\": 70.26807494847726, \"Residuals\": -1.2680749484772633}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -34.45526500991538}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 1.649080315607847}, {\"Fitted Values\": 70.26807494847726, \"Residuals\": -2.2680749484772633}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 19.722602875972598}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -3.350919684392153}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 24.722602875972598}, {\"Fitted Values\": 95.54111667634352, \"Residuals\": 24.45888332365648}, {\"Fitted Values\": 144.77431484751156, \"Residuals\": 65.22568515248844}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": 17.88528541728914}, {\"Fitted Values\": 66.32941909478382, \"Residuals\": -8.329419094783816}, {\"Fitted Values\": 72.56562419646511, \"Residuals\": 6.434375803534891}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -10.277397124027402}, {\"Fitted Values\": 122.45526500991538, \"Residuals\": -22.45526500991538}, {\"Fitted Values\": 168.40624996967222, \"Residuals\": 21.593750030327783}, {\"Fitted Values\": 116.54728122937522, \"Residuals\": -26.547281229375216}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -21.04419895285936}, {\"Fitted Values\": 141.16388031495921, \"Residuals\": -11.163880314959215}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 2.76259712467602}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 3.8912992783156426}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 13.891299278315643}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 9.722602875972598}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -1.9613542169444713}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 2.649080315607847}, {\"Fitted Values\": 68.62696834277166, \"Residuals\": -3.6269683427716615}, {\"Fitted Values\": 139.52277370925364, \"Residuals\": -10.522773709253642}, {\"Fitted Values\": 158.5596103354386, \"Residuals\": 16.440389664561394}, {\"Fitted Values\": 69.61163230619502, \"Residuals\": 1.3883676938049803}, {\"Fitted Values\": 73.55028815988847, \"Residuals\": 9.449711840111533}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": -5.23740287532398}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -14.24973198138737}, {\"Fitted Values\": 95.54111667634352, \"Residuals\": 36.45888332365648}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": -13.11471458271086}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": 4.722602875972598}, {\"Fitted Values\": 128.03502746931443, \"Residuals\": -3.0350274693144286}, {\"Fitted Values\": 155.60561844516852, \"Residuals\": -6.605618445168517}, {\"Fitted Values\": 155.60561844516852, \"Residuals\": -13.605618445168517}, {\"Fitted Values\": 140.50743767267699, \"Residuals\": 4.492562327323014}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -19.24973198138737}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -11.044198952859361}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": 3.649080315607847}, {\"Fitted Values\": 75.51961608673518, \"Residuals\": -0.5196160867351836}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 2.76259712467602}, {\"Fitted Values\": 91.60246082265007, \"Residuals\": 0.39753917734992683}, {\"Fitted Values\": 66.65764041592494, \"Residuals\": 43.342359584075055}, {\"Fitted Values\": 76.50428005015854, \"Residuals\": 10.495719949841458}, {\"Fitted Values\": 189.74063584384504, \"Residuals\": 35.25936415615496}, {\"Fitted Values\": 84.38159175754544, \"Residuals\": 10.618408242454564}, {\"Fitted Values\": 114.24973198138737, \"Residuals\": -14.24973198138737}, {\"Fitted Values\": 93.90001007063792, \"Residuals\": 31.09998992936208}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 15.76259712467602}, {\"Fitted Values\": 166.10870072168436, \"Residuals\": 8.891299278315643}, {\"Fitted Values\": 100.46443649346033, \"Residuals\": -23.464436493460326}, {\"Fitted Values\": 140.50743767267699, \"Residuals\": -0.5074376726769856}, {\"Fitted Values\": 80.11471458271086, \"Residuals\": 31.88528541728914}, {\"Fitted Values\": 158.5596103354386, \"Residuals\": 56.440389664561394}, {\"Fitted Values\": 77.48894401358191, \"Residuals\": 17.511055986418086}, {\"Fitted Values\": 63.703648525654856, \"Residuals\": 1.296351474345144}, {\"Fitted Values\": 125.73747822132658, \"Residuals\": -15.737478221326583}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -0.35091968439215293}, {\"Fitted Values\": 89.96135421694447, \"Residuals\": -4.961354216944471}, {\"Fitted Values\": 155.2773971240274, \"Residuals\": -10.277397124027402}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -14.350919684392153}, {\"Fitted Values\": 105.38775631057713, \"Residuals\": -10.387756310577132}, {\"Fitted Values\": 67.6423043793483, \"Residuals\": -6.642304379348303}, {\"Fitted Values\": 140.17921635153587, \"Residuals\": 9.820783648464129}, {\"Fitted Values\": 106.04419895285936, \"Residuals\": -21.04419895285936}, {\"Fitted Values\": 116.2190599082341, \"Residuals\": -11.219059908234101}, {\"Fitted Values\": 86.35091968439215, \"Residuals\": -14.350919684392153}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 5.76259712467602}, {\"Fitted Values\": 96.52578063976688, \"Residuals\": 0.4742193602331213}, {\"Fitted Values\": 69.93985362733613, \"Residuals\": 0.06014637266386558}, {\"Fitted Values\": 84.38159175754544, \"Residuals\": 5.618408242454564}, {\"Fitted Values\": 72.23740287532398, \"Residuals\": 15.76259712467602}, {\"Fitted Values\": 91.60246082265007, \"Residuals\": 13.397539177349927}], \"data-12fe4be04ac697c852a266ecaf7edd18\": [{\"Fitted Values\": 61.703648525654856, \"Residuals\": 0}, {\"Fitted Values\": 191.74063584384504, \"Residuals\": 0}]}}, {\"mode\": \"vega-lite\"});\n",
-       "</script>"
-      ],
-      "text/plain": [
-       "alt.HConcatChart(...)"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "NameError",
+     "evalue": "name 'y_actual' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlinreg_ally\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mplotting\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m qq_and_residuals_plot\n\u001b[0;32m----> 3\u001b[0m qq_and_residuals_plot(\u001b[43my_actual\u001b[49m, y_predicted)\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'y_actual' is not defined"
+     ]
     }
    ],
    "source": [
+    "#move this import to the top \n",
+    "\n",
     "from linreg_ally.plotting import qq_and_residuals_plot\n",
     "\n",
     "qq_and_residuals_plot(y_actual, y_predicted)"
@@ -265,7 +154,7 @@
     "\n",
     "For the homoscedasticity assumption to hold, residuals should be randomly scattered around the red dashed line in the Residuals vs. Fitted Values plot. This would indicate that residual variance remains constant across all fitted values (homoscedasticity).\n",
     "\n",
-    "However, in our case, residuals cluster at different fitted value ranges, suggesting that the variance is not constant (heteroscedasticity)."
+    "However, in our case, the residuals cluster at different fitted value ranges, and the spread increases as the fitted values increase, suggesting that the variance is not constant (heteroscedasticity)."
    ]
   },
   {

From f5c1ce8916aacd90f7258244fee498fd5e6dd242 Mon Sep 17 00:00:00 2001
From: Merari Santana <santanamerari@gmail.com>
Date: Sat, 25 Jan 2025 18:00:04 -0800
Subject: [PATCH 4/4] merged Chengs changes to my example.ipynb. His outputs
 are my inputs.

---
 docs/example.ipynb | 855 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 835 insertions(+), 20 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 96da9c3..7e1b572 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -64,13 +64,763 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Cheng - model fitting"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Running Linear Regression Tutorial\n",
+    "\n",
+    "In this tutorial, you will learn a streamlined way to preprocess data, run linear regression and output with scoring metrics.\n",
+    "\n",
+    "First, ensure you have the `models` package imported."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from linreg_ally.models import run_linear_regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will be using the `cars` dataset provided by `vega_datasets`. This dataset contains various features related to cars, including both numerical and categorical variables, making it ideal for demonstrating the full capabilities of our linear regression function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Miles_per_Gallon</th>\n",
+       "      <th>Cylinders</th>\n",
+       "      <th>Displacement</th>\n",
+       "      <th>Horsepower</th>\n",
+       "      <th>Weight_in_lbs</th>\n",
+       "      <th>Acceleration</th>\n",
+       "      <th>Year</th>\n",
+       "      <th>Origin</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>chevrolet chevelle malibu</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>1970-01-01</td>\n",
+       "      <td>USA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>buick skylark 320</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>1970-01-01</td>\n",
+       "      <td>USA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>plymouth satellite</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>1970-01-01</td>\n",
+       "      <td>USA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>amc rebel sst</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>1970-01-01</td>\n",
+       "      <td>USA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ford torino</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>1970-01-01</td>\n",
+       "      <td>USA</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        Name  Miles_per_Gallon  Cylinders  Displacement  \\\n",
+       "0  chevrolet chevelle malibu              18.0          8         307.0   \n",
+       "1          buick skylark 320              15.0          8         350.0   \n",
+       "2         plymouth satellite              18.0          8         318.0   \n",
+       "3              amc rebel sst              16.0          8         304.0   \n",
+       "4                ford torino              17.0          8         302.0   \n",
+       "\n",
+       "   Horsepower  Weight_in_lbs  Acceleration       Year Origin  \n",
+       "0       130.0           3504          12.0 1970-01-01    USA  \n",
+       "1       165.0           3693          11.5 1970-01-01    USA  \n",
+       "2       150.0           3436          11.0 1970-01-01    USA  \n",
+       "3       150.0           3433          12.0 1970-01-01    USA  \n",
+       "4       140.0           3449          10.5 1970-01-01    USA  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from vega_datasets import data\n",
+    "\n",
+    "df = data.cars()\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As shown above, the dataset includes data about different car models, featuring attributes such as `Miles_per_Gallon`, `Cylinders`, `Displacement` etc. We will utilize these attributes to build a linear regression model, predicting the target variable `Horsepower`.\n",
+    "\n",
+    "We will first perform some data cleaning by removing columns that contain `NA` values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df[['Horsepower', 'Displacement']].dropna()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With the dataset loaded, you're all set to move forward to the next step: using our package's `run_linear_regression` function to prepare the data, fit a model, and evaluate its performance.\n",
+    "\n",
+    "We will specify the `target_column`, `numeric_feats`, `categorical_feats` and `drop_feats`. In this case, `target_column` will be `Horsepower` since we are trying to predict its value. `numeric_feats` will be all the numeric features that we want to scale using scikit-learn's `StandardScaler`. `categorical_feats` will be the categorical features (in this case only `Origin`) that we want to perform one-hot encoding on using scikit-learn's `OneHotEncoder`. `drop_feats` will be the columns that we do not want to include in the analysis, in which in this case will be `Name` since it does not provide any meaningful information to the analysis.\n",
+    "\n",
+    "For the `scoring_metrics`, we will specify `r2` to evaluate the performance of the model on test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model Summary\n",
+      "------------------------\n",
+      "Test r2: 0.846\n"
+     ]
+    }
+   ],
+   "source": [
+    "from vega_datasets import data\n",
+    "from linreg_ally.models import run_linear_regression\n",
+    "\n",
+    "df = data.cars()\n",
+    "df = df.dropna()\n",
+    "\n",
+    "# Define parameters for run_linear_regression\n",
+    "target_column = \"Horsepower\"\n",
+    "numeric_feats = [\"Miles_per_Gallon\", \"Cylinders\", \"Displacement\", \"Weight_in_lbs\", \"Acceleration\"] \n",
+    "categorical_feats = [\"Origin\"]\n",
+    "drop_feats = [\"Name\"]\n",
+    "random_state = 123\n",
+    "scoring_metrics = [\"r2\"]\n",
+    "\n",
+    "best_model, X_train, X_test, y_train, y_test, scores = run_linear_regression(\n",
+    "    dataframe=df,\n",
+    "    target_column=target_column,\n",
+    "    numeric_feats=numeric_feats,\n",
+    "    categorical_feats=categorical_feats,\n",
+    "    drop_feats=drop_feats,\n",
+    "    random_state=random_state,\n",
+    "    scoring_metrics=scoring_metrics\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`best_model` provides a visual summary of the steps used in the entire linear regression pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {\n",
+       "  /* Definition of color scheme common for light and dark mode */\n",
+       "  --sklearn-color-text: #000;\n",
+       "  --sklearn-color-text-muted: #666;\n",
+       "  --sklearn-color-line: gray;\n",
+       "  /* Definition of color scheme for unfitted estimators */\n",
+       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
+       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+       "  --sklearn-color-unfitted-level-3: chocolate;\n",
+       "  /* Definition of color scheme for fitted estimators */\n",
+       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
+       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
+       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
+       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
+       "\n",
+       "  /* Specific color for light theme */\n",
+       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-icon: #696969;\n",
+       "\n",
+       "  @media (prefers-color-scheme: dark) {\n",
+       "    /* Redefinition of color scheme for dark theme */\n",
+       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-icon: #878787;\n",
+       "  }\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 pre {\n",
+       "  padding: 0;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-hidden--visually {\n",
+       "  border: 0;\n",
+       "  clip: rect(1px 1px 1px 1px);\n",
+       "  clip: rect(1px, 1px, 1px, 1px);\n",
+       "  height: 1px;\n",
+       "  margin: -1px;\n",
+       "  overflow: hidden;\n",
+       "  padding: 0;\n",
+       "  position: absolute;\n",
+       "  width: 1px;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
+       "  border: 1px dashed var(--sklearn-color-line);\n",
+       "  margin: 0 0.4em 0.5em 0.4em;\n",
+       "  box-sizing: border-box;\n",
+       "  padding-bottom: 0.4em;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-container {\n",
+       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+       "     so we also need the `!important` here to be able to override the\n",
+       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+       "  display: inline-block !important;\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-parallel-item,\n",
+       "div.sk-serial,\n",
+       "div.sk-item {\n",
+       "  /* draw centered vertical line to link estimators */\n",
+       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+       "  background-size: 2px 100%;\n",
+       "  background-repeat: no-repeat;\n",
+       "  background-position: center center;\n",
+       "}\n",
+       "\n",
+       "/* Parallel-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item::after {\n",
+       "  content: \"\";\n",
+       "  width: 100%;\n",
+       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+       "  flex-grow: 1;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel {\n",
+       "  display: flex;\n",
+       "  align-items: stretch;\n",
+       "  justify-content: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
+       "  align-self: flex-end;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
+       "  align-self: flex-start;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
+       "  width: 0;\n",
+       "}\n",
+       "\n",
+       "/* Serial-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-serial {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "  align-items: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  padding-right: 1em;\n",
+       "  padding-left: 1em;\n",
+       "}\n",
+       "\n",
+       "\n",
+       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+       "clickable and can be expanded/collapsed.\n",
+       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+       "*/\n",
+       "\n",
+       "/* Pipeline and ColumnTransformer style (default) */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable {\n",
+       "  /* Default theme specific background. It is overwritten whether we have a\n",
+       "  specific estimator or a Pipeline/ColumnTransformer */\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable label */\n",
+       "#sk-container-id-1 label.sk-toggleable__label {\n",
+       "  cursor: pointer;\n",
+       "  display: flex;\n",
+       "  width: 100%;\n",
+       "  margin-bottom: 0;\n",
+       "  padding: 0.5em;\n",
+       "  box-sizing: border-box;\n",
+       "  text-align: center;\n",
+       "  align-items: start;\n",
+       "  justify-content: space-between;\n",
+       "  gap: 0.5em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label .caption {\n",
+       "  font-size: 0.6rem;\n",
+       "  font-weight: lighter;\n",
+       "  color: var(--sklearn-color-text-muted);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
+       "  /* Arrow on the left of the label */\n",
+       "  content: \"▸\";\n",
+       "  float: left;\n",
+       "  margin-right: 0.25em;\n",
+       "  color: var(--sklearn-color-icon);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable content - dropdown */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content {\n",
+       "  max-height: 0;\n",
+       "  max-width: 0;\n",
+       "  overflow: hidden;\n",
+       "  text-align: left;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
+       "  margin: 0.2em;\n",
+       "  border-radius: 0.25em;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+       "  /* Expand drop-down */\n",
+       "  max-height: 200px;\n",
+       "  max-width: 100%;\n",
+       "  overflow: auto;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+       "  content: \"▾\";\n",
+       "}\n",
+       "\n",
+       "/* Pipeline/ColumnTransformer-specific style */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific style */\n",
+       "\n",
+       "/* Colorize estimator box */\n",
+       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  /* The background is the default theme color */\n",
+       "  color: var(--sklearn-color-text-on-default-background);\n",
+       "}\n",
+       "\n",
+       "/* On hover, darken the color of the background */\n",
+       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Label box, darken color on hover, fitted */\n",
+       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator label */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  font-family: monospace;\n",
+       "  font-weight: bold;\n",
+       "  display: inline-block;\n",
+       "  line-height: 1.2em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label-container {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific */\n",
+       "#sk-container-id-1 div.sk-estimator {\n",
+       "  font-family: monospace;\n",
+       "  border: 1px dotted var(--sklearn-color-border-box);\n",
+       "  border-radius: 0.25em;\n",
+       "  box-sizing: border-box;\n",
+       "  margin-bottom: 0.5em;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "/* on hover */\n",
+       "#sk-container-id-1 div.sk-estimator:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+       "\n",
+       "/* Common style for \"i\" and \"?\" */\n",
+       "\n",
+       ".sk-estimator-doc-link,\n",
+       "a:link.sk-estimator-doc-link,\n",
+       "a:visited.sk-estimator-doc-link {\n",
+       "  float: right;\n",
+       "  font-size: smaller;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1em;\n",
+       "  height: 1em;\n",
+       "  width: 1em;\n",
+       "  text-decoration: none !important;\n",
+       "  margin-left: 0.5em;\n",
+       "  text-align: center;\n",
+       "  /* unfitted */\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted,\n",
+       "a:link.sk-estimator-doc-link.fitted,\n",
+       "a:visited.sk-estimator-doc-link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "/* Span, style for the box shown on hovering the info icon */\n",
+       ".sk-estimator-doc-link span {\n",
+       "  display: none;\n",
+       "  z-index: 9999;\n",
+       "  position: relative;\n",
+       "  font-weight: normal;\n",
+       "  right: .2ex;\n",
+       "  padding: .5ex;\n",
+       "  margin: .5ex;\n",
+       "  width: min-content;\n",
+       "  min-width: 20ex;\n",
+       "  max-width: 50ex;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  box-shadow: 2pt 2pt 4pt #999;\n",
+       "  /* unfitted */\n",
+       "  background: var(--sklearn-color-unfitted-level-0);\n",
+       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted span {\n",
+       "  /* fitted */\n",
+       "  background: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link:hover span {\n",
+       "  display: block;\n",
+       "}\n",
+       "\n",
+       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link {\n",
+       "  float: right;\n",
+       "  font-size: 1rem;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1rem;\n",
+       "  height: 1rem;\n",
+       "  width: 1rem;\n",
+       "  text-decoration: none;\n",
+       "  /* unfitted */\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
+       "                 ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;,\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  [&#x27;Miles_per_Gallon&#x27;,\n",
+       "                                                   &#x27;Cylinders&#x27;, &#x27;Displacement&#x27;,\n",
+       "                                                   &#x27;Weight_in_lbs&#x27;,\n",
+       "                                                   &#x27;Acceleration&#x27;]),\n",
+       "                                                 (&#x27;onehotencoder&#x27;,\n",
+       "                                                  OneHotEncoder(), [&#x27;Origin&#x27;]),\n",
+       "                                                 (&#x27;drop&#x27;, &#x27;drop&#x27;, [&#x27;Name&#x27;])])),\n",
+       "                (&#x27;model&#x27;, LinearRegression())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>Pipeline</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
+       "                 ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;,\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  [&#x27;Miles_per_Gallon&#x27;,\n",
+       "                                                   &#x27;Cylinders&#x27;, &#x27;Displacement&#x27;,\n",
+       "                                                   &#x27;Weight_in_lbs&#x27;,\n",
+       "                                                   &#x27;Acceleration&#x27;]),\n",
+       "                                                 (&#x27;onehotencoder&#x27;,\n",
+       "                                                  OneHotEncoder(), [&#x27;Origin&#x27;]),\n",
+       "                                                 (&#x27;drop&#x27;, &#x27;drop&#x27;, [&#x27;Name&#x27;])])),\n",
+       "                (&#x27;model&#x27;, LinearRegression())])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>preprocessor: ColumnTransformer</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></div></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[(&#x27;standardscaler&#x27;, StandardScaler(),\n",
+       "                                 [&#x27;Miles_per_Gallon&#x27;, &#x27;Cylinders&#x27;,\n",
+       "                                  &#x27;Displacement&#x27;, &#x27;Weight_in_lbs&#x27;,\n",
+       "                                  &#x27;Acceleration&#x27;]),\n",
+       "                                (&#x27;onehotencoder&#x27;, OneHotEncoder(), [&#x27;Origin&#x27;]),\n",
+       "                                (&#x27;drop&#x27;, &#x27;drop&#x27;, [&#x27;Name&#x27;])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>standardscaler</div></div></label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;Miles_per_Gallon&#x27;, &#x27;Cylinders&#x27;, &#x27;Displacement&#x27;, &#x27;Weight_in_lbs&#x27;, &#x27;Acceleration&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>StandardScaler</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></div></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>onehotencoder</div></div></label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;Origin&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>OneHotEncoder</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></div></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder()</pre></div> </div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>drop</div></div></label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;Name&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>drop</div></div></label><div class=\"sk-toggleable__content fitted\"><pre>drop</pre></div> </div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>LinearRegression</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.linear_model.LinearRegression.html\">?<span>Documentation for LinearRegression</span></a></div></label><div class=\"sk-toggleable__content fitted\"><pre>LinearRegression()</pre></div> </div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "Pipeline(steps=[('preprocessor',\n",
+       "                 ColumnTransformer(transformers=[('standardscaler',\n",
+       "                                                  StandardScaler(),\n",
+       "                                                  ['Miles_per_Gallon',\n",
+       "                                                   'Cylinders', 'Displacement',\n",
+       "                                                   'Weight_in_lbs',\n",
+       "                                                   'Acceleration']),\n",
+       "                                                 ('onehotencoder',\n",
+       "                                                  OneHotEncoder(), ['Origin']),\n",
+       "                                                 ('drop', 'drop', ['Name'])])),\n",
+       "                ('model', LinearRegression())])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Scores give the R² and negative mean squared error scores that we are interested in finding out in order to understand how the model performs on the test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'r2': 0.8463952369304465}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scores"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As shown above, an R² score of 85% indicates that 85% of the variance in the dependent variable can be explained by the independent variables included in the model, showing that the model provides a good fit to the data.\n",
+    "\n",
+    "However, R² alone does not tell the whole story, for example if there might be multicollinearity or other issues. You might also want to consider other metrics like Mean Squared Error (MSE), Root Mean Squared Error (RMSE), or visually inspect residual plots to gain a more comprehensive understanding of model performance."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the end of this tutorial where you have seen how we use the `run_linear_regression` function in our package to preprocess data, run linear regression and output with scoring metrics."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -92,12 +842,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Using Cheng's outputs! Tested my `qq_and_residuals_plot` function on his branch\n",
-    "\n",
     "# y_actual is y_test (true labels)\n",
     "y_actual = y_test\n",
     "\n",
@@ -114,24 +862,91 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'y_actual' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlinreg_ally\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mplotting\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m qq_and_residuals_plot\n\u001b[0;32m----> 3\u001b[0m qq_and_residuals_plot(\u001b[43my_actual\u001b[49m, y_predicted)\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'y_actual' is not defined"
-     ]
+     "data": {
+      "text/html": [
+       "\n",
+       "<style>\n",
+       "  #altair-viz-0be138d5b031497b9ba5432a0021b0b6.vega-embed {\n",
+       "    width: 100%;\n",
+       "    display: flex;\n",
+       "  }\n",
+       "\n",
+       "  #altair-viz-0be138d5b031497b9ba5432a0021b0b6.vega-embed details,\n",
+       "  #altair-viz-0be138d5b031497b9ba5432a0021b0b6.vega-embed details summary {\n",
+       "    position: relative;\n",
+       "  }\n",
+       "</style>\n",
+       "<div id=\"altair-viz-0be138d5b031497b9ba5432a0021b0b6\"></div>\n",
+       "<script type=\"text/javascript\">\n",
+       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
+       "  (function(spec, embedOpt){\n",
+       "    let outputDiv = document.currentScript.previousElementSibling;\n",
+       "    if (outputDiv.id !== \"altair-viz-0be138d5b031497b9ba5432a0021b0b6\") {\n",
+       "      outputDiv = document.getElementById(\"altair-viz-0be138d5b031497b9ba5432a0021b0b6\");\n",
+       "    }\n",
+       "\n",
+       "    const paths = {\n",
+       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
+       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
+       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
+       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
+       "    };\n",
+       "\n",
+       "    function maybeLoadScript(lib, version) {\n",
+       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
+       "      return (VEGA_DEBUG[key] == version) ?\n",
+       "        Promise.resolve(paths[lib]) :\n",
+       "        new Promise(function(resolve, reject) {\n",
+       "          var s = document.createElement('script');\n",
+       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
+       "          s.async = true;\n",
+       "          s.onload = () => {\n",
+       "            VEGA_DEBUG[key] = version;\n",
+       "            return resolve(paths[lib]);\n",
+       "          };\n",
+       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
+       "          s.src = paths[lib];\n",
+       "        });\n",
+       "    }\n",
+       "\n",
+       "    function showError(err) {\n",
+       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
+       "      throw err;\n",
+       "    }\n",
+       "\n",
+       "    function displayChart(vegaEmbed) {\n",
+       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
+       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
+       "    }\n",
+       "\n",
+       "    if(typeof define === \"function\" && define.amd) {\n",
+       "      requirejs.config({paths});\n",
+       "      let deps = [\"vega-embed\"];\n",
+       "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
+       "    } else {\n",
+       "      maybeLoadScript(\"vega\", \"5\")\n",
+       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
+       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
+       "        .catch(showError)\n",
+       "        .then(() => displayChart(vegaEmbed));\n",
+       "    }\n",
+       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"hconcat\": [{\"layer\": [{\"data\": {\"name\": \"data-ccd89b37278b1c2f26659922f00da39a\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"title\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"title\": \"Standardized Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Q-Q Plot\"}, {\"data\": {\"name\": \"data-85d19c836a4395e2ec54e74d1ddf6151\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Theoretical Quantiles\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Standardized Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}, {\"layer\": [{\"data\": {\"name\": \"data-eec6c1eeb0f29121bd2246bb52c64d7e\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"title\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"title\": \"Residuals\", \"type\": \"quantitative\"}}, \"title\": \"Residuals vs. Fitted Values\"}, {\"data\": {\"name\": \"data-446f1e4c52a050db474582b7c4c09bfb\"}, \"mark\": {\"type\": \"line\", \"color\": \"red\", \"strokeDash\": [5, 5]}, \"encoding\": {\"x\": {\"field\": \"Fitted Values\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Residuals\", \"type\": \"quantitative\"}}}], \"height\": 300, \"width\": 300}], \"resolve\": {\"scale\": {\"y\": \"independent\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-ccd89b37278b1c2f26659922f00da39a\": [{\"Theoretical Quantiles\": -2.376637019314117, \"Standardized Residuals\": -2.2490345641829896}, {\"Theoretical Quantiles\": -2.0295820363548587, \"Standardized Residuals\": -1.6876837780467517}, {\"Theoretical Quantiles\": -1.82767017019217, \"Standardized Residuals\": -1.2722399049825543}, {\"Theoretical Quantiles\": -1.6808137717578084, \"Standardized Residuals\": -1.2512140051347918}, {\"Theoretical Quantiles\": -1.5632273980301066, \"Standardized Residuals\": -1.217229294317754}, {\"Theoretical Quantiles\": -1.4639809875575682, \"Standardized Residuals\": -1.197354423829137}, {\"Theoretical Quantiles\": -1.3773658408241811, \"Standardized Residuals\": -1.1669384853604876}, {\"Theoretical Quantiles\": -1.3000051805817647, \"Standardized Residuals\": -1.1384559166580122}, {\"Theoretical Quantiles\": -1.2297273570645535, \"Standardized Residuals\": -1.0571088338128714}, {\"Theoretical Quantiles\": -1.1650487900783992, \"Standardized Residuals\": -0.9794076237945866}, {\"Theoretical Quantiles\": -1.1049084301270362, \"Standardized Residuals\": -0.9380630817510653}, {\"Theoretical Quantiles\": -1.0485195618143197, \"Standardized Residuals\": -0.9235355512272697}, {\"Theoretical Quantiles\": -0.9952816095495151, \"Standardized Residuals\": -0.9116096659172}, {\"Theoretical Quantiles\": -0.9447248932823304, \"Standardized Residuals\": -0.794596499789103}, {\"Theoretical Quantiles\": -0.8964745440906905, \"Standardized Residuals\": -0.7892476314015014}, {\"Theoretical Quantiles\": -0.8502260970497463, \"Standardized Residuals\": -0.7593787301304056}, {\"Theoretical Quantiles\": -0.8057284866156872, \"Standardized Residuals\": -0.7478583441090165}, {\"Theoretical Quantiles\": -0.762771894238928, \"Standardized Residuals\": -0.7260260027395706}, {\"Theoretical Quantiles\": -0.7211788694273811, \"Standardized Residuals\": -0.6576666658784989}, {\"Theoretical Quantiles\": -0.6807977151721739, \"Standardized Residuals\": -0.6524219714012955}, {\"Theoretical Quantiles\": -0.6414974745197237, \"Standardized Residuals\": -0.648205715214883}, {\"Theoretical Quantiles\": -0.6031640715396379, \"Standardized Residuals\": -0.6447501031668017}, {\"Theoretical Quantiles\": -0.5656972990983161, \"Standardized Residuals\": -0.6290526702228142}, {\"Theoretical Quantiles\": -0.5290084374718068, \"Standardized Residuals\": -0.5910605638333082}, {\"Theoretical Quantiles\": -0.49301834945943585, \"Standardized Residuals\": -0.5823661645737415}, {\"Theoretical Quantiles\": -0.45765593991667886, \"Standardized Residuals\": -0.5370982822050091}, {\"Theoretical Quantiles\": -0.42285689710830887, \"Standardized Residuals\": -0.518403844207887}, {\"Theoretical Quantiles\": -0.38856265417936203, \"Standardized Residuals\": -0.5089546675756078}, {\"Theoretical Quantiles\": -0.35471952406574686, \"Standardized Residuals\": -0.49832526025173013}, {\"Theoretical Quantiles\": -0.32127797210960785, \"Standardized Residuals\": -0.4758809551247645}, {\"Theoretical Quantiles\": -0.28819199870930046, \"Standardized Residuals\": -0.3828070910298673}, {\"Theoretical Quantiles\": -0.25541861033981045, \"Standardized Residuals\": -0.35882317781288015}, {\"Theoretical Quantiles\": -0.22291736179348368, \"Standardized Residuals\": -0.30233370178531643}, {\"Theoretical Quantiles\": -0.1906499559099913, \"Standardized Residuals\": -0.2807635496174032}, {\"Theoretical Quantiles\": -0.1585798896697412, \"Standardized Residuals\": -0.23513263886906402}, {\"Theoretical Quantiles\": -0.12667213751764733, \"Standardized Residuals\": -0.1738929671322723}, {\"Theoretical Quantiles\": -0.09489286430934206, \"Standardized Residuals\": -0.15459930474516148}, {\"Theoretical Quantiles\": -0.06320916143473933, \"Standardized Residuals\": -0.09945798654650279}, {\"Theoretical Quantiles\": -0.03158880055064742, \"Standardized Residuals\": -0.07307133697018711}, {\"Theoretical Quantiles\": 0.0, \"Standardized Residuals\": -0.06808691905657795}, {\"Theoretical Quantiles\": 0.031588800550647556, \"Standardized Residuals\": -0.0647685524015116}, {\"Theoretical Quantiles\": 0.06320916143473933, \"Standardized Residuals\": -0.04275984719069488}, {\"Theoretical Quantiles\": 0.09489286430934206, \"Standardized Residuals\": -0.03787136436566666}, {\"Theoretical Quantiles\": 0.1266721375176472, \"Standardized Residuals\": -0.028764991833916367}, {\"Theoretical Quantiles\": 0.1585798896697412, \"Standardized Residuals\": 0.037056844926974926}, {\"Theoretical Quantiles\": 0.19064995590999143, \"Standardized Residuals\": 0.049582731721799465}, {\"Theoretical Quantiles\": 0.22291736179348368, \"Standardized Residuals\": 0.0695816864383233}, {\"Theoretical Quantiles\": 0.25541861033981056, \"Standardized Residuals\": 0.1407164906199366}, {\"Theoretical Quantiles\": 0.28819199870930046, \"Standardized Residuals\": 0.1710254433818738}, {\"Theoretical Quantiles\": 0.32127797210960796, \"Standardized Residuals\": 0.18941798706388285}, {\"Theoretical Quantiles\": 0.354719524065747, \"Standardized Residuals\": 0.22906113060845568}, {\"Theoretical Quantiles\": 0.38856265417936203, \"Standardized Residuals\": 0.2771164379490998}, {\"Theoretical Quantiles\": 0.422856897108309, \"Standardized Residuals\": 0.2852053802725724}, {\"Theoretical Quantiles\": 0.45765593991667886, \"Standardized Residuals\": 0.29543618592182574}, {\"Theoretical Quantiles\": 0.493018349459436, \"Standardized Residuals\": 0.3149199090037982}, {\"Theoretical Quantiles\": 0.529008437471807, \"Standardized Residuals\": 0.3152115758903181}, {\"Theoretical Quantiles\": 0.5656972990983161, \"Standardized Residuals\": 0.4232112183970164}, {\"Theoretical Quantiles\": 0.6031640715396381, \"Standardized Residuals\": 0.42909483940493015}, {\"Theoretical Quantiles\": 0.6414974745197242, \"Standardized Residuals\": 0.4342075239304801}, {\"Theoretical Quantiles\": 0.6807977151721739, \"Standardized Residuals\": 0.45546832877521387}, {\"Theoretical Quantiles\": 0.7211788694273814, \"Standardized Residuals\": 0.48652641857539003}, {\"Theoretical Quantiles\": 0.762771894238928, \"Standardized Residuals\": 0.6267170299990659}, {\"Theoretical Quantiles\": 0.8057284866156873, \"Standardized Residuals\": 0.6631825647424995}, {\"Theoretical Quantiles\": 0.8502260970497466, \"Standardized Residuals\": 0.6853706536831163}, {\"Theoretical Quantiles\": 0.8964745440906909, \"Standardized Residuals\": 0.7122192611794553}, {\"Theoretical Quantiles\": 0.9447248932823311, \"Standardized Residuals\": 0.7493290386695906}, {\"Theoretical Quantiles\": 0.9952816095495156, \"Standardized Residuals\": 0.8310164166981472}, {\"Theoretical Quantiles\": 1.0485195618143202, \"Standardized Residuals\": 0.9675223417626825}, {\"Theoretical Quantiles\": 1.1049084301270362, \"Standardized Residuals\": 1.0513869425083093}, {\"Theoretical Quantiles\": 1.1650487900783997, \"Standardized Residuals\": 1.0518572924304102}, {\"Theoretical Quantiles\": 1.229727357064554, \"Standardized Residuals\": 1.0572371955196658}, {\"Theoretical Quantiles\": 1.3000051805817654, \"Standardized Residuals\": 1.0593157085598628}, {\"Theoretical Quantiles\": 1.377365840824182, \"Standardized Residuals\": 1.0856048440726727}, {\"Theoretical Quantiles\": 1.4639809875575682, \"Standardized Residuals\": 1.235841127503714}, {\"Theoretical Quantiles\": 1.563227398030108, \"Standardized Residuals\": 1.3474916124664873}, {\"Theoretical Quantiles\": 1.6808137717578102, \"Standardized Residuals\": 1.4715037031407066}, {\"Theoretical Quantiles\": 1.8276701701921714, \"Standardized Residuals\": 2.5154021826821595}, {\"Theoretical Quantiles\": 2.029582036354861, \"Standardized Residuals\": 3.410308005250081}, {\"Theoretical Quantiles\": 2.376637019314117, \"Standardized Residuals\": 3.9301565764479145}], \"data-85d19c836a4395e2ec54e74d1ddf6151\": [{\"Theoretical Quantiles\": -3, \"Standardized Residuals\": -3}, {\"Theoretical Quantiles\": 3, \"Standardized Residuals\": 3}], \"data-eec6c1eeb0f29121bd2246bb52c64d7e\": [{\"Fitted Values\": 113.87041628567088, \"Residuals\": -3.870416285670885}, {\"Fitted Values\": 59.98787930792722, \"Residuals\": 10.012120692072777}, {\"Fitted Values\": 144.34749807934503, \"Residuals\": -4.34749807934503}, {\"Fitted Values\": 95.4405890385369, \"Residuals\": 1.5594109614631009}, {\"Fitted Values\": 69.81631853575522, \"Residuals\": 9.183681464244785}, {\"Fitted Values\": 116.13368939445189, \"Residuals\": -11.133689394451892}, {\"Fitted Values\": 91.53891878905529, \"Residuals\": 5.461081210944712}, {\"Fitted Values\": 105.79363660054392, \"Residuals\": -7.79363660054392}, {\"Fitted Values\": 146.4169857047358, \"Residuals\": -6.416985704735794}, {\"Fitted Values\": 75.39066892492363, \"Residuals\": -8.39066892492363}, {\"Fitted Values\": 159.47870937257935, \"Residuals\": -7.478709372579345}, {\"Fitted Values\": 115.90118861098267, \"Residuals\": 9.098811389017328}, {\"Fitted Values\": 176.73230446645766, \"Residuals\": 13.267695533542337}, {\"Fitted Values\": 64.72550651656661, \"Residuals\": 18.27449348343339}, {\"Fitted Values\": 160.0544429577114, \"Residuals\": -15.054442957711387}, {\"Fitted Values\": 101.75346418211697, \"Residuals\": -4.753464182116971}, {\"Fitted Values\": 77.83987740659354, \"Residuals\": 18.160122593406456}, {\"Fitted Values\": 179.4478081652968, \"Residuals\": -29.447808165296806}, {\"Fitted Values\": 145.35773742542173, \"Residuals\": -15.357737425421732}, {\"Fitted Values\": 166.30632051516616, \"Residuals\": -6.306320515166163}, {\"Fitted Values\": 109.23599170253696, \"Residuals\": 0.764008297463036}, {\"Fitted Values\": 77.29070600657894, \"Residuals\": 3.7092939934210563}, {\"Fitted Values\": 94.52784411169515, \"Residuals\": -2.5278441116951456}, {\"Fitted Values\": 65.40644447722002, \"Residuals\": -5.40644447722002}, {\"Fitted Values\": 55.43588614151094, \"Residuals\": 9.564113858489058}, {\"Fitted Values\": 161.19700322883108, \"Residuals\": 13.802996771168921}, {\"Fitted Values\": 104.97611946828177, \"Residuals\": 5.023880531718234}, {\"Fitted Values\": 76.39766919947641, \"Residuals\": -0.397669199476411}, {\"Fitted Values\": 104.49263939048927, \"Residuals\": -6.492639390489273}, {\"Fitted Values\": 56.47138945868055, \"Residuals\": 3.528610541319452}, {\"Fitted Values\": 125.72174566415542, \"Residuals\": 39.27825433584458}, {\"Fitted Values\": 63.779775807386315, \"Residuals\": 24.220224192613685}, {\"Fitted Values\": 113.42792972059826, \"Residuals\": -13.427929720598257}, {\"Fitted Values\": 105.70176542139748, \"Residuals\": 6.298234578602518}, {\"Fitted Values\": 98.83878516917302, \"Residuals\": -13.838785169173022}, {\"Fitted Values\": 97.95981590185676, \"Residuals\": -7.959815901856757}, {\"Fitted Values\": 72.55221413293673, \"Residuals\": 2.447785867063274}, {\"Fitted Values\": 97.45906220137087, \"Residuals\": 7.54093779862913}, {\"Fitted Values\": 87.74256906757668, \"Residuals\": 9.257430932423318}, {\"Fitted Values\": 147.81290527816319, \"Residuals\": 52.187094721836814}, {\"Fitted Values\": 135.53185947560698, \"Residuals\": -5.5318594756069785}, {\"Fitted Values\": 108.05996681091035, \"Residuals\": 1.9400331890896467}, {\"Fitted Values\": 67.5686258115723, \"Residuals\": 22.431374188427696}, {\"Fitted Values\": 98.35043354101451, \"Residuals\": -21.35043354101451}, {\"Fitted Values\": 96.07988813397739, \"Residuals\": -6.079888133977391}, {\"Fitted Values\": 165.31419193733046, \"Residuals\": 59.68580806266954}, {\"Fitted Values\": 103.00857706959961, \"Residuals\": 6.99142293040039}, {\"Fitted Values\": 85.05588610751654, \"Residuals\": -1.0558861075165424}, {\"Fitted Values\": 78.89189566837062, \"Residuals\": 7.108104331629377}, {\"Fitted Values\": 109.27752909616558, \"Residuals\": -14.27752909616558}, {\"Fitted Values\": 83.00222502029874, \"Residuals\": 3.997774979701262}, {\"Fitted Values\": 66.48380059280407, \"Residuals\": -4.483800592804073}, {\"Fitted Values\": 171.83309270152532, \"Residuals\": 18.166907298474683}, {\"Fitted Values\": 132.43964916284662, \"Residuals\": 12.56035083715338}, {\"Fitted Values\": 112.17916208595628, \"Residuals\": 20.82083791404372}, {\"Fitted Values\": 165.01867887566618, \"Residuals\": 14.981321124333817}, {\"Fitted Values\": 182.25451405848895, \"Residuals\": -12.254514058488951}, {\"Fitted Values\": 83.62272953570489, \"Residuals\": 2.3772704642951084}, {\"Fitted Values\": 97.18188088322808, \"Residuals\": -2.181880883228075}, {\"Fitted Values\": 67.98806756336805, \"Residuals\": 2.0119324366319518}, {\"Fitted Values\": 174.27360996582945, \"Residuals\": 5.726390034170549}, {\"Fitted Values\": 186.35616703894516, \"Residuals\": -6.356167038945159}, {\"Fitted Values\": 92.94020077737538, \"Residuals\": 2.059799222624619}, {\"Fitted Values\": 88.04960776900617, \"Residuals\": 16.950392230993828}, {\"Fitted Values\": 134.51429898575674, \"Residuals\": 0.4857010142432614}, {\"Fitted Values\": 70.46782529883468, \"Residuals\": -8.467825298834683}, {\"Fitted Values\": 71.36703130784406, \"Residuals\": -1.3670313078440586}, {\"Fitted Values\": 99.56422019187622, \"Residuals\": -14.564220191876217}, {\"Fitted Values\": 95.74431831015504, \"Residuals\": 7.2556816898449625}, {\"Fitted Values\": 91.32774574248585, \"Residuals\": -10.327745742485845}, {\"Fitted Values\": 102.1195903461739, \"Residuals\": 12.880409653826106}, {\"Fitted Values\": 52.96565732562971, \"Residuals\": 12.034342674370293}, {\"Fitted Values\": 72.46326943881309, \"Residuals\": 7.536730561186914}, {\"Fitted Values\": 162.19417097041205, \"Residuals\": -4.194170970412046}, {\"Fitted Values\": 96.34629091011678, \"Residuals\": 18.653709089883222}, {\"Fitted Values\": 89.42085650116891, \"Residuals\": 2.579143498831087}, {\"Fitted Values\": 91.75548865815009, \"Residuals\": 18.244511341849915}, {\"Fitted Values\": 155.53730251148326, \"Residuals\": -10.537302511483261}, {\"Fitted Values\": 110.1557171937302, \"Residuals\": -10.155717193730197}], \"data-446f1e4c52a050db474582b7c4c09bfb\": [{\"Fitted Values\": 50.96565732562971, \"Residuals\": 0}, {\"Fitted Values\": 188.35616703894516, \"Residuals\": 0}]}}, {\"mode\": \"vega-lite\"});\n",
+       "</script>"
+      ],
+      "text/plain": [
+       "alt.HConcatChart(...)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "#move this import to the top \n",
-    "\n",
     "from linreg_ally.plotting import qq_and_residuals_plot\n",
     "\n",
     "qq_and_residuals_plot(y_actual, y_predicted)"