From 165c92b331162f4d6f6a4de5cb453bbeac4c9cc4 Mon Sep 17 00:00:00 2001
From: Winston <winston@Winstons-MacBook-Pro.local>
Date: Thu, 15 Aug 2019 11:06:13 -0700
Subject: [PATCH 1/7] initial commit for WIP pr & issue notebook addition;
 currently seeing parse_dates arg error on first read_csv; new issue as of
 yesterday; otherwise notebook was/should be running fine as labeled/noted

---
 .../zillow_kaggle_zestimate_comp.ipynb        | 3046 +++++++++++++++++
 1 file changed, 3046 insertions(+)
 create mode 100644 colab_notebooks/zillow_kaggle_zestimate_comp.ipynb

diff --git a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
new file mode 100644
index 00000000..24a1849f
--- /dev/null
+++ b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
@@ -0,0 +1,3046 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "zillow_kaggle_zestimate_comp.ipynb",
+      "version": "0.3.2",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "scfLT2i0MLyD",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Environment Sanity Check #\n",
+        "\n",
+        "Click the _Runtime_ dropdown at the top of the page, then _Change Runtime Type_ and confirm the instance type is _GPU_.\n",
+        "\n",
+        "Check the output of `!nvidia-smi` to make sure you've been allocated a Tesla T4.\n",
+        "\n",
+        "#Setup:\n",
+        "\n",
+        "1. Install most recent Miniconda release compatible with Google Colab's Python install  (3.6.7)\n",
+        "2. Install RAPIDS libraries\n",
+        "3. Set necessary environment variables\n",
+        "4. Copy RAPIDS .so files into current working directory, a workaround for conda/colab interactions\n",
+        "- **TLDR**\n",
+        "  - Hit `Shift` + `Enter`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "W-um5d-x7o46",
+        "colab_type": "code",
+        "outputId": "37bf77fb-7f83-49fc-b5e5-514cd049e32d",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 329
+        }
+      },
+      "source": [
+        "\"\"\"make sure we have the right GPU\n",
+        "> column 1 row 3 == Tesla T4\n",
+        "\"\"\"\n",
+        "# display gpu specs\n",
+        "!nvidia-smi"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Thu Aug 15 03:12:33 2019       \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| NVIDIA-SMI 418.67       Driver Version: 410.79       CUDA Version: 10.0     |\n",
+            "|-------------------------------+----------------------+----------------------+\n",
+            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
+            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
+            "|===============================+======================+======================|\n",
+            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
+            "| N/A   60C    P8    16W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
+            "+-------------------------------+----------------------+----------------------+\n",
+            "                                                                               \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| Processes:                                                       GPU Memory |\n",
+            "|  GPU       PID   Type   Process name                             Usage      |\n",
+            "|=============================================================================|\n",
+            "|  No running processes found                                                 |\n",
+            "+-----------------------------------------------------------------------------+\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kkEdr1VmigyU",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Install RAPIDS AI"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "p129YxxnihcV",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!wget -nc https://github.com/rapidsai/notebooks-contrib/blob/master/utils/rapids-colab.sh\n",
+        "!bash rapids-colab.sh\n",
+        "\n",
+        "import sys, os\n",
+        "\n",
+        "sys.path.append('/usr/local/lib/python3.6/site-packages/')\n",
+        "os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'\n",
+        "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1CsdVW7SU9Li",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Zillow Kaggle Competition RAPIDS Conversion\n",
+        "- initially based off eswar3's [Zillow prediction models]( https://github.com/eswar3/Zillow-prediction-models) repo\n",
+        "## Download Data\n",
+        "- to download the data, please plug in your kaggle api username & key\n",
+        "  - you can set up your kaggle api at `https://www.kaggle.com/YOUR USERNAME HERE/account`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "x1dLRTm168Tk",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Info on how to get your api key (kaggle.json) here: https://github.com/Kaggle/kaggle-api#api-credentials\n",
+        "!pip install kaggle\n",
+        "!mkdir /root/.kaggle\n",
+        "# plug api -- get your own API key\n",
+        "!echo '{\"username\":\"warobson\",\"key\":\"\"}' > /root/.kaggle/kaggle.json\n",
+        "!chmod 600 /root/.kaggle/kaggle.json\n",
+        "# !kaggle datasets download\n",
+        "!kaggle competitions download -c zillow-prize-1\n",
+        "\n",
+        "# unzip kaggle data\n",
+        "!unzip -q \"/content/sample_submission.csv.zip\"\n",
+        "!unzip -q \"/content/train_2016_v2.csv.zip\"\n",
+        "!unzip -q \"/content/properties_2016.csv.zip\"\n",
+        "!unzip -q \"/content/train_2017.csv.zip\"\n",
+        "!unzip -q \"/content/properties_2017.csv.zip\""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LICr9uz8do9K",
+        "colab_type": "text"
+      },
+      "source": [
+        "#### How is the data saved?\n",
+        "- inside content directory "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6n75DyJ-dm4B",
+        "colab_type": "code",
+        "outputId": "fbd949ae-aa45-4c67-c6e2-74553239623e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 182
+        }
+      },
+      "source": [
+        "# display content folder contents\n",
+        "!ls \"/content/\""
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0.9\t\t\t\t  sample_data\n",
+            "env-check.py\t\t\t  sample_submission.csv\n",
+            "__MACOSX\t\t\t  sample_submission.csv.zip\n",
+            "Miniconda3-4.5.4-Linux-x86_64.sh  train_2016_v2.csv\n",
+            "properties_2016.csv\t\t  train_2016_v2.csv.zip\n",
+            "properties_2016.csv.zip\t\t  train_2017.csv\n",
+            "properties_2017.csv\t\t  train_2017.csv.zip\n",
+            "properties_2017.csv.zip\t\t  zillow_data_dictionary.xlsx.zip\n",
+            "rapids-colab.sh\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lpa1b4edIXuT",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Imports\n",
+        "### RAPIDS\n",
+        "* `cuDf`\n",
+        "  - words here\n",
+        "* `cuML`\n",
+        "  - words here\n",
+        "* `cuPy`\n",
+        "  - words here\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "_Tvf2biLAA9r",
+        "colab": {}
+      },
+      "source": [
+        "# rapids imports\n",
+        "import cudf, cuml, cupy\n",
+        "# general imports \n",
+        "import io, requests  "
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YJeywzd2efw7",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Data\n",
+        "* `properties_2016`\n",
+        "  - aprox. 27,000,000 residential properties \n",
+        "  - 58 attributes each\n",
+        "* `train_2016_v2`\n",
+        "  - 90,000 transaction records for closings in the year 2016\n",
+        "    * Merge datasets on `property_id`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uynoUxpx8Xsn",
+        "colab_type": "code",
+        "outputId": "545d3b69-741a-4f23-86df-62ec7f19fb7d",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 227
+        }
+      },
+      "source": [
+        "# import train 2016  data\n",
+        "train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n",
+        "                          parse_dates=[\"transactiondate\"])\n",
+        "# peek display 2016 train\n",
+        "print(train2016.head())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "TypeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-6-ff87c45bf2d8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n\u001b[0;32m----> 2\u001b[0;31m                           parse_dates=[\"transactiondate\"])\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;31m# peek display 2016 train\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain2016\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mTypeError\u001b[0m: read_csv() got an unexpected keyword argument 'parse_dates'"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2EfApIzCfEtr",
+        "colab_type": "code",
+        "outputId": "eabb1351-f4f9-499c-9aea-2fa2953c11a7",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 146
+        }
+      },
+      "source": [
+        "# import 2016 properties\n",
+        "prop2016 = cudf.read_csv('/content/properties_2016.csv')\n",
+        "# peek display 2016 properties\n",
+        "print(prop2016.head())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "   parcelid  airconditioningtypeid  architecturalstyletypeid  basementsqft  bathroomcnt  bedroomcnt  buildingclasstypeid ...  censustractandblock\n",
+            "0  10754147                                                                         0.0         0.0                      ...                     \n",
+            "1  10759547                                                                         0.0         0.0                      ...                     \n",
+            "2  10843547                                                                         0.0         0.0                      ...                     \n",
+            "3  10859147                                                                         0.0         0.0                  3.0 ...                     \n",
+            "4  10879947                                                                         0.0         0.0                  4.0 ...                     \n",
+            "[50 more columns]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gGiscxESJDrl",
+        "colab_type": "text"
+      },
+      "source": [
+        "## [Zillow Prediction Model](https://github.com/eswar3/Zillow-prediction-models/blob/master/Step%202a-Approach1.ipynb)\n",
+        "\n",
+        "    In this approach the properties data and transaction data are merged together before adressing any missing values\n",
+        "\n",
+        "\n",
+        "#### Merging Data \n",
+        " - we will start by merging the two dataframes\n",
+        "  - then rename the new dataframe's attributes to be meaningful \n",
+        "    - e.g. from `pooltypeid7` to `pool_with_spa_tub_no` and `structuretaxvaluedollarcnt` to `structure_tax`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "o4CvSIcwm4B2",
+        "colab_type": "code",
+        "outputId": "6db5ec53-8522-4483-e2fa-d79d9d9d75e8",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 146
+        }
+      },
+      "source": [
+        "# merge 2016 train and property dataframes by parcel id\n",
+        "train = train2016.merge(prop2016, how='left', on='parcelid')\n",
+        "\n",
+        "# work on a copy\n",
+        "df_train = train.copy()  # [:int(0.5*len(train))]\n",
+        "\n",
+        "# add column inidcaticating month of transaction\n",
+        "df_train['transaction_month'] = df_train['transactiondate'].dt.month\n",
+        "\n",
+        "# set colums to be renamed for general english understandability \n",
+        "rename_these = {\"bathroomcnt\": \"total_bath\",\n",
+        "                \"fullbathcnt\": \"full_bath\",\n",
+        "                \"threequarterbathnbr\": \"half_bath\",\n",
+        "                \"yardbuildingsqft17\": \"patio_sqft\",\n",
+        "                \"yardbuildingsqft26\":\"storage_sqft\",\n",
+        "                \"decktypeid\": \"deck_flag\",\n",
+        "                \"pooltypeid7\": \"pool_with_spa_tub_no\", \n",
+        "                \"pooltypeid2\": \"pool_with_spa_tub_yes\",\n",
+        "                \"hashottuborspa\": \"has_hottub_or_spa\", \n",
+        "                \"pooltypeid10\": \"just_hottub_or_spa\",\n",
+        "                \"calculatedfinishedsquarefeet\":\"total_finished_living_area_sqft\", \n",
+        "                \"finishedsquarefeet12\": \"finished_living_area_sqft\",\n",
+        "                \"lotsizesquarefeet\": \"lot_area_sqft\",\n",
+        "                \"finishedsquarefeet50\":\"finished_living_area_entryfloor_sqft1\",\n",
+        "                \"finishedfloor1squarefeet\":\"finished_living_area_entryfloor_sqft2\",\n",
+        "                \"finishedsquarefeet6\": \"base_unfinished_and_finished_area_sqft\",\n",
+        "                \"finishedsquarefeet15\": \"total_area_sqft\",\n",
+        "                \"finishedsquarefeet13\": \"preimeter_living_area_sqft\",\n",
+        "                \"taxvaluedollarcnt\":\"total_parcel_tax\",\n",
+        "                \"landtaxvaluedollarcnt\":\"land_tax\",\n",
+        "                \"taxamount\":\"total_property_tax_2016\",\n",
+        "                \"structuretaxvaluedollarcnt\":\"structure_tax\",\n",
+        "                \"garagetotalsqft\":\"garage_sqft\",\n",
+        "                \"fireplacecnt\":\"fireplace_count\",\n",
+        "                \"buildingqualitytypeid \":\"building_quality_id\",\n",
+        "                \"heatingorsystemtypeid\":\"heating_system_id\",\n",
+        "                \"airconditioningtypeid\":\"ac_id\",\n",
+        "                \"storytypeid\": \"basement_flag\",\n",
+        "                \"basementsqft\": \"basement_sqft\",\n",
+        "                \"poolsizesum\": \"pool_sqft\",\n",
+        "                \"poolcnt\": \"pool_count\"}\n",
+        "# rename columns \n",
+        "df_train = df_train.rename(columns = rename_these)\n",
+        "\n",
+        "# what's the data frame look like?\n",
+        "print(df_train.head())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "   parcelid             logerror         transactiondate  ac_id  architecturalstyletypeid  basement_sqft  total_bath ...  transaction_month\n",
+            "0  11827818               0.0402 2016-03-15T00:00:00.000                                                         4.0 ...                  3\n",
+            "1  12123024               0.0296 2016-03-15T00:00:00.000                                                         3.0 ...                  3\n",
+            "2  13867327               0.0344 2016-03-15T00:00:00.000                                                         2.0 ...                  3\n",
+            "3  12681894                0.006 2016-03-15T00:00:00.000                                                         3.0 ...                  3\n",
+            "4  12848541  0.06949999999999999 2016-03-15T00:00:00.000    1.0                                                  4.0 ...                  3\n",
+            "[53 more columns]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YdtyBI2jFnJv",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Conforming Attribute Values\n",
+        "### #0 boolean columns & null = 0s cases \n",
+        "* `pool_count`, `pool_with_spa_tub_no` and `pool_with_spa_tub_yes` are all binary variables, replace all NULL values with zero\n",
+        "*   `basement_flag` has values 7 & `Null` but is supposed to be bool, convert the `7`s to `1`s and the `Null`s to `0`s \n",
+        "* patio and shed variables with null values are assumed to have none\n",
+        "* deck_flag has only 2 values, `66` and `null`\n",
+        "  - convert it into binary flag\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "z3bPdNONHTYI",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# replace missing pool count values so we booling\n",
+        "the_bool_club = ['pool_count','pool_with_spa_tub_no','pool_with_spa_tub_yes',\n",
+        "                 'basement_flag','patio_sqft','storage_sqft', 'deck_flag']\n",
+        "for col in the_bool_club:\n",
+        "  # convert null values to 0\n",
+        "  df_train[col]=df_train[col].fillna(0)\n",
+        "# convert 7s and 66s to 1s\n",
+        "df_train['basement_flag'] = df_train['basement_flag'].replace(7, 1)\n",
+        "df_train['deck_flag'] = df_train['deck_flag'].replace(66, 1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5MbGy6r7JLLD",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #1 The pool\n",
+        "*   When pool is present and if it has tub/spa then `just_hottub_or_spa` = 0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "B3-1V93smA9A",
+        "colab_type": "code",
+        "outputId": "66d7335e-bc42-4108-a1c1-80f1afb06a4b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 380
+        }
+      },
+      "source": [
+        "# when poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null, then just_hottub_or_spa =0\n",
+        "conditions = ((df_train['pool_count'] == 1) \n",
+        "              & (df_train['has_hottub_or_spa'] == 1) \n",
+        "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
+        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(0, conditions) "
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "TypeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-27-10369f477b6c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m conditions = ((df_train['pool_count'] == 1) \n\u001b[1;32m      2\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m               & (df_train['just_hottub_or_spa'].isna() == True))\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'just_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'just_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconditions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    811\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    812\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__eq__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 813\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'eq'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    814\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    815\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mequals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m_unordered_compare\u001b[0;34m(self, other, cmpops)\u001b[0m\n\u001b[1;32m    781\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmpops\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    782\u001b[0m         \u001b[0mnvtx_range_push\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CUDF_UNORDERED_COMP\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"orange\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 783\u001b[0;31m         \u001b[0mother\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_normalize_binop_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    784\u001b[0m         \u001b[0moutcol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmpops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    785\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_copy_construct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m_normalize_binop_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    777\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    778\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 779\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormalize_binop_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    781\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmpops\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mnormalize_binop_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    703\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    704\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 705\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cannot broadcast {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    706\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    707\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdefault_na_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mTypeError\u001b[0m: cannot broadcast <class 'int'>"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v6E3-_XlSGBs",
+        "colab_type": "text"
+      },
+      "source": [
+        "- when `has_hottub_or_spa` is null and `just_hottub_or_spa` is null\n",
+        "  - both should be zero\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Xa12WFccSGM6",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# if both has hottub and just hottub are null\n",
+        "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
+        "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
+        "# just hottub or spa = 0 \n",
+        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(0, conditions) \n",
+        "\n",
+        "# now, if has hottub is null and just hottub is 0 \n",
+        "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
+        "              & (df_train['just_hottub_or_spa'] == 0))\n",
+        "# has hottub or spa = 0 \n",
+        "df_train['has_hottub_or_spa'] = df_train['has_hottub_or_spa'].masked_assign(0, conditions) "
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5umCCWN73qxw",
+        "colab_type": "text"
+      },
+      "source": [
+        "- when there is no pool\n",
+        "  - if there is tub/spa \n",
+        "    - then `just_hottub_or_spa`  = 1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FBgs7zJm3qk-",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# when poolcnt=0, has_hottub_or_spa=1\n",
+        "conditions = ((df_train['pool_count'] == 0) \n",
+        "              & (df_train['has_hottub_or_spa'] == 1))\n",
+        "# just_hottub_or_spa=1\n",
+        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(1, conditions) \n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3LsRr1aoSCVx",
+        "colab_type": "text"
+      },
+      "source": [
+        "*   When there is no pool, set pool size to zero instead of na"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NtdyXCbx0TKx",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# where there is no pool\n",
+        "conditions = df_train['pool_count']==0\n",
+        "# square footage of non existant pool is 0 \n",
+        "df_train['pool_sqft'] = df_train['pool_sqft'].masked_assign(0, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3hQFkXmAgQPY",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #2 The basement\n",
+        "*    Where `basement_flag` is zero, `basement_sqft` should also be zero\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kMuCOqAmLTmY",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# where there is no basement\n",
+        "conditions = df_train['basement_flag'] == 0\n",
+        "# fun fact: we just did this with the pool\n",
+        "df_train['basement_sqft'] = df_train['basement_sqft'].masked_assign(0, conditions) "
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wU6Uohb-PDYB",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #3 The fireplace\n",
+        "There seems to be inconsistency between the `fireplace_flag` and `fireplace_count`\n",
+        "- 90,053 flag values are null\n",
+        "- 80,688 `fireplace_count` values are null\n",
+        "    * 9,385 (-11.5%) difference, but a boatload either way"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OZM6lXmmpj5k",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
+        "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v9ZAzFoIpkSF",
+        "colab_type": "text"
+      },
+      "source": [
+        "* context driven solutions\n",
+        "  * where neither flag nor count exists, `fireplaceflag == False`\n",
+        "  *   when `fireplace_count` is more than zero `fireplaceflag` should be `True`\n",
+        "  * if `fireplaceflag == False`, the `fireplace_count` is logically `0`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "i3YRZgU_qZhA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# null flags with null counts are zero\n",
+        "conditions = ((df_train['fireplace_count'].isna()==True) \n",
+        "              & (df_train['fireplaceflag'].isna()==True))\n",
+        "df_train['fireplaceflag'] = df_train['fireplaceflag'].masked_assign(False, conditions)\n",
+        "\n",
+        "# true flags for positive fireplace counts\n",
+        "conditions = df_train['fireplace_count'] > 0\n",
+        "df_train['fireplaceflag'] = df_train['fireplaceflag'].masked_assign(True, conditions)\n",
+        "\n",
+        "# set fireplace count nulls to 0 where false flags are\n",
+        "conditions = ((df_train['fireplace_count'].isna()==True) \n",
+        "              & (df_train['fireplaceflag']==False))\n",
+        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(0, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "pYntUejosOn3"
+      },
+      "source": [
+        "### #4 The garage\n",
+        "*   Properties with no garages would have NA values for both "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "L9mGs-mK9E0Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "garage = ['garagecarcnt', 'garage_sqft']\n",
+        "# where garage car count and garage square feet are null, set both to 0\n",
+        "conditions = ((df_train['garagecarcnt'].isna()==True) \n",
+        "              & (df_train['garage_sqft'].isna()==True))\n",
+        "for i in garage:\n",
+        "  df_train[i] = df_train[i].masked_assign(0, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0uV115W6-ohW",
+        "colab_type": "text"
+      },
+      "source": [
+        "Exploring the data farther, we see\n",
+        "- `garage_sqft` holds over 8,900 measurements of 0 despite the garage's car count being 1 or more  \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gbbUIbwJ-ouS",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# show rows where garage count and square feet don't add up\n",
+        "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
+        "print(df_train.loc[conditions][garage])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5I1O76QKA8Cb",
+        "colab_type": "text"
+      },
+      "source": [
+        "- these 0 values need to be null\n",
+        " - because no garage holding 1 or more cars in 2016 measured 0sqft"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "eWVtoty0A9Jt",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# where garage count and square feet don't add up\n",
+        "conditions = (df_train.garagecarcnt>0) & (df_train.garage_sqft==0)\n",
+        "# insert a NaN value\n",
+        "df_train['garage_sqft'] = df_train['garage_sqft'].masked_assign(cupy.nan, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "seb6r5wx5Bbz"
+      },
+      "source": [
+        "### #5 The bath\n",
+        "*   `total_bath` & `calculatedbathnbr` are near-duplicates w/ `calculated` having more nulls\n",
+        "  - let's drop it\n",
+        "*   if `full_bath` is null and `half_bath` is also null\n",
+        "  - let's make `total_bath` = 0 \n",
+        "      - because we can't truthfully assume it's any more "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "EgMNToed5BMu",
+        "colab": {}
+      },
+      "source": [
+        "# drop calculated bath column\n",
+        "df_train = df_train.drop('calculatedbathnbr', axis=1)\n",
+        "\n",
+        "# if full_bath is null & half_bath is null\n",
+        "conditions = ((df_train['full_bath'].isnull()==True) \n",
+        "              & (df_train['half_bath'].isnull()==True))\n",
+        "# total_bath=0\n",
+        "df_train['total_bath'] = df_train['total_bath'].masked_assign(0, conditions)\n",
+        "\n",
+        "# when full_bath==total_bath\n",
+        "conditions = df_train.full_bath == df_train.total_bath\n",
+        "# half_bath=0 \n",
+        "df_train['half_bath'] = df_train['half_bath'].masked_assign(0, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Sh8cG0pr4_hl"
+      },
+      "source": [
+        "### #6 Mode Imputation \n",
+        "* scaling down the latitude and longitide\n",
+        "  - knn imput takes more time due to the larger numbers\n",
+        "  - standardizing gives better results on most algorithms\n",
+        "    - this is a competition, we came to win"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kitrNxKgLWUd",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df_train['latitude'] = [lat/100000 for lat in df_train['latitude']]\n",
+        "df_train['longitude'] = [long/100000 for long in df_train['longitude']]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y6bhRhu5YZ1d",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #7 numberofstories & unitcnt & roomcnt\n",
+        "* we can devise unit count based on property land type\n",
+        "  - so we can now go ahead and correct the unit counts for each given property"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yHZH4rMNLfBA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# where room count is 0, go ahead and NaN it\n",
+        "conditions = df_train['roomcnt'] == 0\n",
+        "df_train['roomcnt'] = df_train['roomcnt'].masked_assign(cupy.nan, conditions)\n",
+        "\n",
+        "\"\"\"\n",
+        "propertylandusetypeid & unitcnt are related \n",
+        "  these are the propertylandusetypeid codes & their definitions\n",
+        "  \n",
+        "#246 -Duplex (2 Units, Any Combination)\n",
+        "#247 -Triplex (3 Units, Any Combination)\n",
+        "#248 -Quadruplex (4 Units, Any Combination)\n",
+        "#260 -Residential General\n",
+        "#261 -Single Family Residential\n",
+        "#263 -Mobile Home\n",
+        "#264 -Townhouse\n",
+        "#266 -Condominium\n",
+        "#267 -Cooperative\n",
+        "#269 -Planned Unit Development\n",
+        "#275 -Residential Common Area \n",
+        "#31 - Commercial/Office/Residential Mixed Used\n",
+        "#47 -Store/Office (Mixed Use)\n",
+        "#265 -Cluster Home\n",
+        "\"\"\"\n",
+        "\n",
+        "# one unit \n",
+        "ones = [260,261,263,264,266,267,269,275]\n",
+        "for one in ones:\n",
+        "  # adjust conditions to one unit indicator\n",
+        "  conditions = ((df_train['propertylandusetypeid'] == one) \n",
+        "                & (df_train['unitcnt'].isnull()))\n",
+        "  df_train['unitcnt'] = df_train['unitcnt'].masked_assign(1, conditions)\n",
+        "\n",
+        "# two units \n",
+        "twos = [31,47,246]\n",
+        "for two in twos:\n",
+        "  # adjust conditions to two unit indicator\n",
+        "  conditions = ((df_train['propertylandusetypeid'] == two) \n",
+        "                & (df_train['unitcnt'].isnull()))\n",
+        "  df_train['unitcnt'] = df_train['unitcnt'].masked_assign(2, conditions)\n",
+        "\n",
+        "# three units\n",
+        "conditions = ((df_train['propertylandusetypeid'] == 247) \n",
+        "              & (df_train['unitcnt'].isnull()))\n",
+        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(3, conditions)\n",
+        "\n",
+        "# four units\n",
+        "conditions = ((df_train['propertylandusetypeid'] == 248) \n",
+        "              & (df_train['unitcnt'].isnull()))\n",
+        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(4, conditions)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "02yLicmxLs3C",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #8 Time to Cut\n",
+        "**Because of the adjustments made so far a number of columns are no longer needed**\n",
+        "*  transaction date column is no longer of use\n",
+        "  - and can be dropped \n",
+        "* `preimeter_living_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+        "  - except that `preimeter_living_area_sqft` has more duplicates\n",
+        "* `total_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+        "  - except that \"total_area_sqft\" has more duplicates\n",
+        "* `total_finished_living_area_sqft` and `finished_living_area_sqft` have the same values \n",
+        "  - except that `finished_living_area_sqft` has more duplicates\n",
+        "* `base_unfinished_and_finished_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+        "  - except that `base_unfinished_and_finished_area_sqft` has more duplicates\n",
+        "* different counties follow different land use code\n",
+        "  - to compare different counties, zillow has created it's own `propertylandusetypeid`\n",
+        "    - hence we can drop `propertycountylandusecode`\n",
+        "    - the same applies to `propertyzoningdesc`\n",
+        "* Most zip id's either invalid or out of city\n",
+        "  - since enough information about location is given in latitude and longitude \n",
+        "    - let's drop other location related fields\n",
+        "      - `regionidcity`\n",
+        "      - `regionidzip`\n",
+        "      - `regionidneighborhood`\n",
+        "* `assessmentyear` has a constant value for all rows\n",
+        "  - let's drop it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OtOgzOqHLyid",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# collect columns to drop\n",
+        "cut = ['propertyzoningdesc','propertycountylandusecode',\n",
+        "       'base_unfinished_and_finished_area_sqft','finished_living_area_sqft',\n",
+        "       'total_area_sqft','preimeter_living_area_sqft','regionidzip',\n",
+        "       'regionidcity','regionidneighborhood','assessmentyear','transactiondate',\n",
+        "       'censustractandblock']\n",
+        "# cut columns form dataframe\n",
+        "df_train = df_train.drop(cut, axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "icDvpvSD6BSb",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #9 Tax, Year, & Census\n",
+        "-  if tax deliquency flag is null, assume there is no unpaid tax on the property\n",
+        "  - an issue arrises here because `taxdelinquencyflag` is a `StringColumn`\n",
+        "    - i.e. null values indicate no tax delinquency, all other values are `Y` for yes\n",
+        "    - because of this, the normal method of.."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8lYcO_T5XKNN",
+        "colab_type": "code",
+        "outputId": "0b77457e-0eed-4e21-be79-1df380432abc",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 303
+        }
+      },
+      "source": [
+        "# how we'd normally take care of this\n",
+        "df_train['taxdelinquencyflag'].fillna(0)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "TypeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-24-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1135\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1137\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1138\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1139\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    709\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    710\u001b[0m         ):\n\u001b[0;32m--> 711\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    713\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tA6xG6h59rLi",
+        "colab_type": "text"
+      },
+      "source": [
+        "- ...comes with error. \n",
+        "  - Why?\n",
+        "    - the series we are trying to fill the null values of is a string series\n",
+        "      - because of this `.fillna()` requires a sting value (e.g. '0') instead of an int value (e.g. 0)\n",
+        "  - So, what now?\n",
+        "    - there is an easy and straightforward solution with masked assigning!! \n",
+        "      - First\n",
+        "        - switch 1 (current True, actual False) to -1\n",
+        "      - Then\n",
+        "        - switch 0 (current False, actual True) to 1 to reflect True status\n",
+        "      - Finally\n",
+        "        - switch -1 (old True, actual False) to 0 to reflect False status"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Svp6J0cJ5dL0",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# if bool 'Y'/None is already set, change string to int bool column via .isna()\n",
+        "df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].isna()\n",
+        "\n",
+        "# next we must correct the values, with 1 (True) for 'Y' and 0 for no\n",
+        "switcharoo = [(1,-1),(0,1),(-1,0)]\n",
+        "# switch values in order\n",
+        "for pair in switcharoo:\n",
+        "  # tag old value and new value it will be replaced with\n",
+        "  old, new = pair\n",
+        "  # replace old value with new value\n",
+        "  df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].replace(old, \n",
+        "                                                                          new)\n",
+        "# display values in tax delinquency flag column\n",
+        "print(df_train['taxdelinquencyflag'].value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w5EAdWXaCTRU",
+        "colab_type": "text"
+      },
+      "source": [
+        "- Convert years\n",
+        "  - from yy\n",
+        "    - to 2016 - yyyy \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6Bic66I9LfGC",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# set year paris -- e.g. from 5 to 2016 - 2005\n",
+        "year_pairs = [(99,2016-1999),(6,2016-2006),(7,2016-2007),(8,2016-2008),\n",
+        "              (9,2016-2009),(10,2016-2010),(11,2016-2011),(12,2016-2012),\n",
+        "              (13,2016-2013),(14,2016-2014),(15,2016-2015)]\n",
+        "# go though year pairs\n",
+        "for pair in year_pairs:\n",
+        "  # tag old value and new value it will be replaced with\n",
+        "  old, new = pair\n",
+        "  # replace old value with new value\n",
+        "  df_train['taxdelinquencyyear'] = df_train['taxdelinquencyyear'].replace(old, \n",
+        "                                                                          new)\n",
+        "# what're we lookin at?\n",
+        "print(df_train['taxdelinquencyyear'].value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ya7xLHzdGVcs",
+        "colab_type": "text"
+      },
+      "source": [
+        "- values in `rawcensustractandblock` represent multiple fields concatened together as float values\n",
+        "  - by converting those values to string we can split each and build new columns:\n",
+        "    - `census_tractnumber`\n",
+        "    - `block_number`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "b3sh8aGovTLT",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(df_train['rawcensustractandblock'].head())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AJrFMIuvvqUr",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# using series instead of dataframe\n",
+        "tractnumber = s_rawcensustractandblock.values_to_string()\n",
+        "# adjust tract number\n",
+        "for i in range(len(cudf_tractnumber)):\n",
+        "  funct = slice(4,11)\n",
+        "  tractnumber[i] = tractnumber[i][funct]\n",
+        "# set new tract number column\n",
+        "df_train['census_tractnumber'] = census_tractnumber\n",
+        "\n",
+        "# using series instead of dataframe\n",
+        "block_number = s_rawcensustractandblock.values_to_string()\n",
+        "# set/adjust block number\n",
+        "for i in range(len(block_number)):\n",
+        "  funct = slice(11, None)\n",
+        "  block_number[i] = block_number[i][funct]\n",
+        "  block_number[i] = block_number[i][:4]+'.'+block_number[i][4:]+'0'\n",
+        "  block_number[i] = int(round(float(block_number[i]), 0))\n",
+        "  block_number[i] = str(block_number[i]).ljust(4,'0')\n",
+        "# add block number column to dataframe\n",
+        "df_train['block_number'] = block_number\n",
+        "\n",
+        "# rawcensustractandblock values have been converted\n",
+        "df_train = df_train.drop('rawcensustractandblock', axis=1)\n",
+        "# let's see what we've got\n",
+        "print(df_train[['census_tractnumber', 'block_number']].head(3))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T71orw51lpTN",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Dealing with Missing Values\n",
+        "### #1 Setting standards\n",
+        "- Despite corecting and adjusting the data to this point, there are still some columns holding a large majority of null values\n",
+        "- For some columns, this majority represents over 95% of values\n",
+        "  - Let's identify those columns\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xhCosNpXvTVU",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# calculate null value % for each column & frame it\n",
+        "missingvalues_prop = (df_train.isnull().sum()/len(df_train)).reset_index()\n",
+        "missingvalues_prop.columns = ['field','percentage']\n",
+        "\n",
+        "# sort by null values percentage, from highest % to lowest\n",
+        "missingvalues_prop = missingvalues_prop.sort_values(by='percentage', \n",
+        "                                                    ascending=False)\n",
+        "# identify columns with > 95% of values null\n",
+        "missingvaluescols = missingvalues_prop.loc[missingvalues_prop['percentage'] > 0.95]\n",
+        "\n",
+        "# display columns with highest % null values\n",
+        "print(missingvaluescols)\n",
+        "\n",
+        "# drop columns with more than 95% null values\n",
+        "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8eBIDWEUBHwz",
+        "colab_type": "text"
+      },
+      "source": [
+        "- and drop columns with more than 95% null values"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "az6t2ntBCMRe",
+        "colab_type": "text"
+      },
+      "source": [
+        "### #2 Working with Remaining Values\n",
+        "- the majority of values still missing in unitcnt are rows were `propertylandusetypeid` = 265, \n",
+        "  - which is Cluster Home (i.e. group of houses with shared walls)\n",
+        "    - each cluster is anywhere between 5 to 25 units\n",
+        "      - here we will asssume 10 units as reassonable count"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yB2lzAyopS_S",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# highly related propertylandusetypeid\n",
+        "conditions = df_train['propertylandusetypeid'] == 265\n",
+        "#  unitcnt            360\n",
+        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(10, conditions)\n",
+        "# let's see what we've got\n",
+        "print(df_train['unitcnt'].value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ofZIC0EdKJ0Y",
+        "colab_type": "text"
+      },
+      "source": [
+        "# -----current: test ready-----"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "c8Zfn-YhlSBO",
+        "colab_type": "code",
+        "outputId": "2087fa66-8683-4040-a3e1-7654942367b7",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "poolsizesum_mean = df_train.loc[df_train['pool_count'] > 0].pool_sqft.mean()\n",
+        "\"\"\"\n",
+        "NEEDS TO BE CONFIRMED WITH OG\n",
+        "> is this supposed to only consider if pool_sqft > 0 as well?\n",
+        "\"\"\"\n",
+        "poolsizesum_mean"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "28.13881906038769"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 86
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "cA30ozCWo5x3",
+        "colab_type": "code",
+        "outputId": "fda7011f-6bee-4b60-e137-ec04d05e440b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 118
+        }
+      },
+      "source": [
+        "print(df_train.loc[df_train['pool_count'] > 0].pool_sqft.head())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "5    0.0\n",
+            "8    0.0\n",
+            "11    0.0\n",
+            "13    0.0\n",
+            "23    0.0\n",
+            "Name: pool_sqft, dtype: float64\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-icFDeLSoJwl",
+        "colab_type": "code",
+        "outputId": "9c5035bd-b766-4509-c5a8-f3a475093dd4",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 907
+        }
+      },
+      "source": [
+        "print(df_train.loc[df_train.pool_count>0].pool_sqft.value_counts())\n",
+        "print(df_train.pool_sqft.value_counts())\n",
+        "print(df_train.loc[df_train.pool_count>0].pool_sqft.isna().sum())\n",
+        "print(df_train.pool_sqft.isna().sum())\n",
+        "\n",
+        "\n",
+        "\n",
+        "# calculate the average pool square footage for properties with a pool(s)\n",
+        "new_value = df_train.loc[df_train['pool_count'] > 0, 'pool_sqft'].mean()\n",
+        "\n",
+        "# where the property has a pool(s) but pool square feet is 0\n",
+        "conditions = ((df_train['pool_count'] > 0) \n",
+        "              & (df_train['pool_sqft'] == 0))\n",
+        "\n",
+        "# set pool square feet to the average pool square footage of pool properties\n",
+        "df_train['pool_sqft'] = df_train['pool_sqft'].masked_assign(new_value, conditions)\n",
+        "\n",
+        "\n",
+        "print(df_train.loc[df_train.pool_count>0].pool_sqft.value_counts())\n",
+        "print(df_train.pool_sqft.value_counts())\n",
+        "print()\n",
+        "print(df_train.loc[df_train.pool_count>0].pool_sqft.isna().sum())\n",
+        "print(df_train.pool_sqft.isna().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0.0    16932\n",
+            "450.0      105\n",
+            "400.0       41\n",
+            "800.0       39\n",
+            "500.0       36\n",
+            "600.0       35\n",
+            "512.0       30\n",
+            "480.0       27\n",
+            "648.0       18\n",
+            "420.0       17\n",
+            "[264 more rows]\n",
+            "dtype: int64\n",
+            "0.0    89306\n",
+            "450.0      105\n",
+            "400.0       41\n",
+            "800.0       39\n",
+            "500.0       36\n",
+            "600.0       35\n",
+            "512.0       30\n",
+            "480.0       27\n",
+            "648.0       18\n",
+            "420.0       17\n",
+            "[264 more rows]\n",
+            "dtype: int64\n",
+            "0\n",
+            "0\n",
+            "28.13881906038769    16932\n",
+            "450.0      105\n",
+            "400.0       41\n",
+            "800.0       39\n",
+            "500.0       36\n",
+            "600.0       35\n",
+            "512.0       30\n",
+            "480.0       27\n",
+            "648.0       18\n",
+            "420.0       17\n",
+            "[264 more rows]\n",
+            "dtype: int64\n",
+            "0.0    72374\n",
+            "28.13881906038769    16932\n",
+            "450.0      105\n",
+            "400.0       41\n",
+            "800.0       39\n",
+            "500.0       36\n",
+            "600.0       35\n",
+            "512.0       30\n",
+            "480.0       27\n",
+            "648.0       18\n",
+            "[265 more rows]\n",
+            "dtype: int64\n",
+            "\n",
+            "0\n",
+            "0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3pVABkZTYK9F",
+        "colab_type": "code",
+        "outputId": "42a0b5cc-42e2-41c5-8fdd-11485c45c933",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 774
+        }
+      },
+      "source": [
+        "# where total tax and land tax are both greater than 0\n",
+        "\n",
+        "# TESTING (SWITCH TO OG)\n",
+        "# test = df_train.copy()\n",
+        "# test.loc[(test.total_parcel_tax>0) & (test.land_tax>0),'structure_tax']=test['total_parcel_tax']-test['land_tax']\n",
+        "hmm = df_train.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
+        "print(f'{len(hmm)} rows where total and land are greater than 0')\n",
+        "print(f'{len(df_train)} total rows, hopefully the same as above number')\n",
+        "print()\n",
+        "print(len(hmm.loc[hmm.structure_tax!=hmm['total_parcel_tax']-hmm['land_tax']]))\n",
+        "print()\n",
+        "print(hmm.loc[hmm.structure_tax!=hmm['total_parcel_tax']-hmm['land_tax']])\n",
+        "print()\n",
+        "any_neg = hmm.loc[hmm.total_parcel_tax < hmm.land_tax]\n",
+        "# if this comes back as 0, setting all structures to total - land should work\n",
+        "print(f'{len(any_neg)} total taxes are less than same rows land tax\\n')\n",
+        "print(any_neg)\n",
+        "# SWITCH TO RAPIDS \n",
+        "\"\"\"current concern\n",
+        "are there places where total and land are not greater than 0 \n",
+        "and setting structure to their difference is not the best move\"\"\"\n",
+        "\n",
+        "\n",
+        "# # structure tax should be equal to total tax minus land tax\n",
+        "# df_train['structure_tax'] = df_train['total_parcel_tax'] - df_train['land_tax']\n",
+        "new_value = df_train['total_parcel_tax'] - df_train['land_tax']\n",
+        "conditions = (df_train.total_parcel_tax>0) & (df_train.land_tax>0)\n",
+        "df_train['structure_tax'] = df_train['structure_tax'].masked_assign(new_value, conditions)\n",
+        "\n",
+        "# # where structure tax is 0\n",
+        "conditions = df_train['structure_tax'] == 0\n",
+        "# # we do not know the structure tax, so insert a Nan value\n",
+        "df_train['structure_tax'] = df_train['structure_tax'].masked_assign(cupy.nan, conditions)\n",
+        "\n",
+        "# print(test.isna().sum())\n",
+        "# print(test.value_counts().head())\n",
+        "# print(test_1.isna().sum())\n",
+        "# print(test_1.value_counts().head())\n",
+        "\n",
+        "\n",
+        "# SWITCH TO OG \n",
+        "\"\"\"\n",
+        "#total_parcel_tax\n",
+        "#structure_tax\n",
+        "#land_tax\n",
+        "#total_property_tax_2016\n",
+        "#2)recalculate total_parcel_tax =structure_tax + land_tax\n",
+        "\n",
+        "# total_parcel_tax =structure_tax + land_tax\n",
+        "#->structure_tax=total_parcel_tax -land_tax\n",
+        "\n",
+        "df_train.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0),'structure_tax']=df_train['total_parcel_tax']-df_train['land_tax']\n",
+        "\n",
+        "#structure_tax, i see a lot of structure tax is 0's, those must be NA's\n",
+        "\n",
+        "df_train.loc[df_train.structure_tax==0,'structure_tax']=np.nan\n",
+        "\"\"\"\n",
+        "print(df_train.total_property_tax_2016.isnull().sum())\n",
+        "print(df_train.structure_tax.isnull().sum())\n",
+        "print(df_train.total_parcel_tax.isnull().sum())\n",
+        "print(df_train.land_tax.isnull().sum())\n",
+        "\n",
+        "# SWITCH TO RAPIDS\n",
+        "# print(test[['structure_tax','land_tax','total_parcel_tax']])"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "90274 rows where total and land are greater than 0\n",
+            "90275 total rows, hopefully the same as above number\n",
+            "\n",
+            "379\n",
+            "\n",
+            "    parcelid               logerror  ac_id  basement_sqft  total_bath  bedroomcnt  buildingqualitytypeid ...  census_tractnumber\n",
+            "266  17188959                 0.0944                   0.0         0.0         0.0                        ...             0056.00\n",
+            "297  12956410   -0.14850000000000002                   0.0         0.0         0.0                        ...             4080.05\n",
+            "336  12966610                 0.0488                   0.0         6.0         9.0                    7.0 ...             4303.01\n",
+            "454  17188961                  0.003                   0.0         0.0         0.0                        ...             0056.00\n",
+            "474  17188974    0.10260000000000001                   0.0         0.0         0.0                        ...             0056.00\n",
+            "555  17266056                -0.5175                   0.0         0.0         0.0                        ...             0059.08\n",
+            "601  17205423                 0.0733                   0.0         0.0         0.0                        ...             0076.06\n",
+            "790  10858080    0.05450000000000001                   0.0         2.0         3.0                    7.0 ...             1412.01\n",
+            "791  10858080    0.08620000000000001                   0.0         2.0         3.0                    7.0 ...             1412.01\n",
+            "976  11325190  -0.024300000000000002                   0.0         0.0         0.0                        ...             9102.06\n",
+            "[369 more rows]\n",
+            "[38 more columns]\n",
+            "\n",
+            "0 total taxes are less than same rows land tax\n",
+            "\n",
+            "Empty DataFrame\n",
+            "Columns: ['parcelid', 'logerror', 'ac_id', 'basement_sqft', 'total_bath', 'bedroomcnt', 'buildingqualitytypeid', 'census_tractnumber']\n",
+            "Index: []\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-93-bdfcd5900cba>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[0mnew_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_parcel_tax'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'land_tax'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0mconditions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal_parcel_tax\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mland_tax\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'structure_tax'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'structure_tax'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconditions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[0;31m# # where structure tax is 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mmasked_assign\u001b[0;34m(self, value, mask)\u001b[0m\n\u001b[1;32m   1073\u001b[0m         \"\"\"\n\u001b[1;32m   1074\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1075\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1076\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_copy_construct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1077\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36mmasked_assign\u001b[0;34m(self, value, mask)\u001b[0m\n\u001b[1;32m    494\u001b[0m             \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_gpu_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    495\u001b[0m             \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmask_invert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 496\u001b[0;31m             \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    497\u001b[0m         )\n\u001b[1;32m    498\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnull_count\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/utils/cudautils.py\u001b[0m in \u001b[0;36mfill_mask\u001b[0;34m(data, mask, value)\u001b[0m\n\u001b[1;32m    235\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    236\u001b[0m         \u001b[0mconfigured\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgpu_fill_masked\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 237\u001b[0;31m         \u001b[0mconfigured\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    238\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    222\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAutoJitCUDAKernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    225\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    226\u001b[0m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mspecialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    761\u001b[0m         '''\n\u001b[1;32m    762\u001b[0m         argtypes = tuple(\n\u001b[0;32m--> 763\u001b[0;31m             [self.typingctx.resolve_argument_type(a) for a in args])\n\u001b[0m\u001b[1;32m    764\u001b[0m         \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    765\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    761\u001b[0m         '''\n\u001b[1;32m    762\u001b[0m         argtypes = tuple(\n\u001b[0;32m--> 763\u001b[0;31m             [self.typingctx.resolve_argument_type(a) for a in args])\n\u001b[0m\u001b[1;32m    764\u001b[0m         \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    765\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/typing/context.py\u001b[0m in \u001b[0;36mresolve_argument_type\u001b[0;34m(self, val)\u001b[0m\n\u001b[1;32m    296\u001b[0m         \"\"\"\n\u001b[1;32m    297\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 298\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mtypeof\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPurpose\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margument\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    299\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    300\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mnumba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_cuda_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/typing/typeof.py\u001b[0m in \u001b[0;36mtypeof\u001b[0;34m(val, purpose)\u001b[0m\n\u001b[1;32m     32\u001b[0m         msg = _termcolor.errmsg(\n\u001b[1;32m     33\u001b[0m             \"cannot determine Numba type of %r\") % (type(val),)\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: cannot determine Numba type of <class 'cudf.dataframe.series.Series'>"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8SID48LOpYvu",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# regionidcounty is exact copy of fips code, dropping the dulicate column\n",
+        "df_train = df_train.drop(['regionidcounty'], axis=1)\n",
+        "df_train.shape"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "tWmM2J8_pkg1",
+        "colab_type": "code",
+        "outputId": "2393cbab-218f-4849-c32c-700495dfb18e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 622
+        }
+      },
+      "source": [
+        "#*******************************\n",
+        "#bedroomcnt #1421 zero bed room houses ??, observed it's missing all other room count also missing\n",
+        "print(df_train.bedroomcnt.value_counts())\n",
+        "\n",
+        "conditions = df_train['bedroomcnt'] == 0\n",
+        "df_train['bedroomcnt'] = df_train['bedroomcnt'].masked_assign(cupy.nan, conditions)\n",
+        "\n",
+        "\n",
+        "print(df_train.bedroomcnt.value_counts())\n",
+        "print(df_train.bedroomcnt.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "3.0    35447\n",
+            "2.0    22357\n",
+            "4.0    20279\n",
+            "5.0     5077\n",
+            "1.0     3897\n",
+            "0.0     1421\n",
+            "6.0     1120\n",
+            "8.0      274\n",
+            "7.0      234\n",
+            "9.0       91\n",
+            "[7 more rows]\n",
+            "dtype: int64\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "error",
+          "ename": "RuntimeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-94-29ba50e2a85d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, method, sort)\u001b[0m\n\u001b[1;32m   1827\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnull_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1828\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1829\u001b[0;31m         \u001b[0mvals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcnts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1830\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcnts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1831\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/numerical.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, method)\u001b[0m\n\u001b[1;32m    215\u001b[0m             \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"non sort based value_count() not implemented yet\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    216\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m         \u001b[0msegs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msortedvals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unique_segments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    218\u001b[0m         \u001b[0;31m# Return both values and their counts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m         \u001b[0mout_vals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcpp_copying\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_gather_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msortedvals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msegs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36m_unique_segments\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    684\u001b[0m         \u001b[0mdensecol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dense_buffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    685\u001b[0m         \u001b[0;31m# sort the column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 686\u001b[0;31m         \u001b[0msortcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdensecol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_by_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    687\u001b[0m         \u001b[0;31m# find segments\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    688\u001b[0m         \u001b[0msortedvals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msortcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/numerical.py\u001b[0m in \u001b[0;36msort_by_values\u001b[0;34m(self, ascending, na_position)\u001b[0m\n\u001b[1;32m    161\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0msort_by_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"last\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m         \u001b[0msort_inds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_sorted_inds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    164\u001b[0m         \u001b[0mcol_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcpp_copying\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_gather_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort_inds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    165\u001b[0m         col_inds = self.replace(\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/_sort.py\u001b[0m in \u001b[0;36mget_sorted_inds\u001b[0;34m(by, ascending, na_position)\u001b[0m\n\u001b[1;32m     77\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Must use a boolean or list of booleans\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m     \u001b[0mcpp_sort\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_order_by\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol_inds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mcol_inds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32mcudf/bindings/sort.pyx\u001b[0m in \u001b[0;36mcudf.bindings.sort.apply_order_by\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;32mcudf/bindings/sort.pyx\u001b[0m in \u001b[0;36mcudf.bindings.sort.apply_order_by\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;31mRuntimeError\u001b[0m: merge_sort: failed to synchronize: an illegal memory access was encountered"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3qnP2L9LpmeJ",
+        "colab_type": "code",
+        "outputId": "bc0119de-0644-414f-bf59-bd132c7c0e15",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 387
+        }
+      },
+      "source": [
+        "# propertylandusetypeid & total living area\n",
+        "#                              total_bath           1165\n",
+        "#                              full_bath           1182\n",
+        "#                              half_bath           1182\n",
+        "#                                bedroomcnt      1421\n",
+        "#                              roomcnt           1416\n",
+        "\n",
+        "\n",
+        "# roomcnt=(full_bath+half_bath)+ bedroomcnt\n",
+        "# total_bath=fullbath+ 0.5(half_bath)\n",
+        "\n",
+        "#caluculate full bath and half bath again from total bath as, it has few extra columns, (fixes 500 missing values in roomcnt )\n",
+        "\n",
+        "# where full & half bath and bedroom count are not null, but room count is null\n",
+        "conditions = ((df_train['full_bath'].isna() == False) \n",
+        "              & (df_train['half_bath'].isna() == False) \n",
+        "              & (df_train['bedroomcnt'].isna() == False) \n",
+        "              & (df_train['roomcnt'].isna() == True))\n",
+        "# calculate room count including all full & half baths along with bedroom count\n",
+        "new_values = df_train.full_bath + df_train.half_bath + df_train.bedroomcnt\n",
+        "df_train['roomcnt'] = df_train['roomcnt'].masked_assign(new_values, conditions)\n",
+        "\n",
+        "\"\"\"df_train.loc[(df_train.full_bath.notnull()) \n",
+        "             & (df_train.half_bath.notnull()) \n",
+        "             & (df_train.bedroomcnt.notnull()) \n",
+        "             & (df_train.roomcnt.isnull()),['roomcnt']]=df_train.full_bath + df_train.half_bath + df_train.bedroomcnt\"\"\"\n",
+        "\n",
+        "\n",
+        "# most bedroom count and roomcount null are in same place\n",
+        "# all column null count 1133 all columns are null\n",
+        "\n",
+        "print(df_train.total_bath.isnull().sum())\n",
+        "print(df_train.full_bath.isnull().sum())\n",
+        "print(df_train.half_bath.isnull().sum())\n",
+        "print(df_train.bedroomcnt.isnull().sum())\n",
+        "print(df_train.roomcnt.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "ERROR:Call to cuOccupancyMaxPotentialBlockSize results in UNKNOWN_CUDA_ERROR\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "error",
+          "ename": "CudaAPIError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mCudaAPIError\u001b[0m                              Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-95-d46f327c0313>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'half_bath'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bedroomcnt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m               & (df_train['roomcnt'].isna() == True))\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;31m# calculate room count including all full & half baths along with bedroom count\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_bath\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhalf_bath\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36misna\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1238\u001b[0m         \"\"\"Identify missing values in a Series. Alias for isnull.\n\u001b[1;32m   1239\u001b[0m         \"\"\"\n\u001b[0;32m-> 1240\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1242\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mnotna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36misnull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1232\u001b[0m             )\n\u001b[1;32m   1233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1234\u001b[0;31m         \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcudautils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnullmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1235\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/utils/cudautils.py\u001b[0m in \u001b[0;36misnull_mask\u001b[0;34m(data, mask)\u001b[0m\n\u001b[1;32m    432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 434\u001b[0;31m         \u001b[0mgpu_isnull\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    435\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    436\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    226\u001b[0m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 228\u001b[0;31m         \u001b[0mtpb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_thread_per_block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    229\u001b[0m         \u001b[0mtpbm1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtpb\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    230\u001b[0m         \u001b[0mblkct\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mntasks\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtpbm1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mtpb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m_compute_thread_per_block\u001b[0;34m(self, kernel)\u001b[0m\n\u001b[1;32m    249\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    250\u001b[0m                 \u001b[0;31m# Raises from the driver if the feature is unavailable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 251\u001b[0;31m                 \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtpb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_max_potential_block_size\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    252\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    253\u001b[0m                 \u001b[0;31m# Fallback to table-based approach.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36mget_max_potential_block_size\u001b[0;34m(self, func, b2d_func, memsize, blocksizelimit, flags)\u001b[0m\n\u001b[1;32m    646\u001b[0m                                                     \u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    647\u001b[0m                                                     \u001b[0mb2d_cb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 648\u001b[0;31m                                                     memsize, blocksizelimit)\n\u001b[0m\u001b[1;32m    649\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    650\u001b[0m             driver.cuOccupancyMaxPotentialBlockSizeWithFlags(byref(gridsize), byref(blocksize),\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36msafe_cuda_api_call\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0m_logger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'call driver api: %s'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibfn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    289\u001b[0m             \u001b[0mretcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretcode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    291\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0msafe_cuda_api_call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36m_check_error\u001b[0;34m(self, fname, retcode)\u001b[0m\n\u001b[1;32m    323\u001b[0m                     \u001b[0m_logger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcritical\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_getpid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m                     \u001b[0;32mraise\u001b[0m \u001b[0mCudaDriverError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CUDA initialized before forking\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 325\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCudaAPIError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mretcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    326\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    327\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mget_device\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevnum\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mCudaAPIError\u001b[0m: [700] Call to cuOccupancyMaxPotentialBlockSize results in UNKNOWN_CUDA_ERROR"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mvy51Ckev9CX",
+        "colab_type": "text"
+      },
+      "source": [
+        "- correct number of stories by Zillow's `propertylandusetypeid` indicator\n",
+        "  - where null values are not\n",
+        "    - number of stories can be set to mode\n",
+        "  - where there are null values\n",
+        "    - number of stories can be set to the generally accepted number of stories"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IW4CG2InpolD",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# before\n",
+        "print(df_train.numberofstories.isnull().sum())\n",
+        "print(df_train.numberofstories.value_counts())\n",
+        "\n",
+        "#numberofstories\t69705\n",
+        "\n",
+        "# store ids and general number of stories \n",
+        "zillow_type_ids = [(31,2), (246,2), (247,2), (248,2), (260,2), (261,1), \n",
+        "                   (263,1), (266,1), (267,1), (269, 2), (275,1)]\n",
+        "\n",
+        "# go through each id pair \n",
+        "for type_id in zillow_type_ids:\n",
+        "  # split the pair into type id and number of stories\n",
+        "  id, n_stories = type_id\n",
+        "\n",
+        "  # when type id matches and story count is not null\n",
+        "  conditions = ((df_train['propertylandusetypeid'] == id) \n",
+        "                & (df_train['numberofstories'].isna() == False))\n",
+        "  # calculate the mode story count for matching id properties\n",
+        "  mode_stories = df_train.loc[conditions, 'numberofstories'].mode()\n",
+        "  # and set those non null values to the most common value seen\n",
+        "  df_train['numberofstories'] = df_train['numberofstories'].masked_assign(mode_stories, \n",
+        "                                                                          conditions)\n",
+        "  \n",
+        "  # when type id matches and story count is null\n",
+        "  conditions = ((df_train['propertylandusetypeid'] == id) \n",
+        "                & (df_train['numberofstories'].isna() == False))\n",
+        "  # set null values to the common number of stories seen in that type id\n",
+        "  df_train['numberofstories'] = df_train['numberofstories'].masked_assign(n_stories, \n",
+        "                                                                          conditions)\n",
+        "  \n",
+        "# TO BE ADDRESSED\n",
+        "# #https://en.wikipedia.org/wiki/Townhouse , typical town house are usually large, and has atleast 6 rooms\n",
+        "# df_train.loc[(df_train.propertylandusetypeid==264) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "\"\"\"\n",
+        "df_train.loc[(df_train.propertylandusetypeid==246) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==246) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==247) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==247) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==248) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==248) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==260) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==260) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==261) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==261) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==263) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==263) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==266) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==266) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "\n",
+        "df_train.loc[(df_train.propertylandusetypeid==269) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==269) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "prop2016.loc[(prop2016.propertylandusetypeid==275) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==275) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "\n",
+        "prop2016.loc[(prop2016.propertylandusetypeid==267) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==267) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "\n",
+        "#https://en.wikipedia.org/wiki/Townhouse , typical town house are usually large, and has atleast 6 rooms\n",
+        "df_train.loc[(df_train.propertylandusetypeid==264) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
+        "\n",
+        "prop2016.loc[(prop2016.propertylandusetypeid==31) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
+        "df_train.loc[(df_train.propertylandusetypeid==31) & (df_train.numberofstories.isnull()),'numberofstories']=2\"\"\"\n",
+        "\n",
+        "# after\n",
+        "print(df_train.numberofstories.isnull().sum())\n",
+        "print(df_train.numberofstories.value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AHcMsDCxprd4",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "\"\"\"skeptical of this0 cell (and the one above)..\n",
+        "author provides no explination for moding\"\"\"\n",
+        "\n",
+        "# before\n",
+        "print(df_train.fireplace_count.isnull().sum())\n",
+        "print(df_train.fireplace_count.value_counts())\n",
+        "\n",
+        "# where there is a fire place, and count is not null\n",
+        "conditions = ((df_train.fireplaceflag==1) \n",
+        "              & (df_train.fireplace_count.isna() == False))\n",
+        "# calculate the mode fireplace count \n",
+        "mode_fire_count = df_train.loc[conditions, 'fireplace_count'].mode()\n",
+        "# and set those non null values to the most common fireplace count\n",
+        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(mode_fire_count, \n",
+        "                                                                        conditions)\n",
+        "\n",
+        "# where there is a fire place, and count is null\n",
+        "conditions = ((df_train.fireplaceflag==1) \n",
+        "              & (df_train.fireplace_count.isna() == True))\n",
+        "# set null values to the most common fireplace count\n",
+        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(1, \n",
+        "                                                                        conditions)\n",
+        "\n",
+        "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.notnull()),'fireplace_count'].mode()\n",
+        "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.isnull()),'fireplace_count']=1\n",
+        "\n",
+        "# after\n",
+        "print(df_train.fireplace_count.isnull().sum())\n",
+        "print(df_train.fireplace_count.value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DVgF1c_p_bN1",
+        "colab_type": "text"
+      },
+      "source": [
+        "# -----current: break-----"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FIuSWoJspt3H",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import seaborn as sns\n",
+        "color = sns.color_palette()\n",
+        "sns.set(style=\"darkgrid\")\n",
+        "\n",
+        "\n",
+        "ax = sns.countplot(x=\"buildingqualitytypeid\", data=df_train)\n",
+        "\n",
+        "plt.xticks(rotation='vertical')\n",
+        "plt.title(\"Frequency of Bathroom count\", fontsize=15)\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "KOHPCFRSp5y9",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "plt.plot(df_train.yearbuilt,df_train.buildingqualitytypeid , 'ro')\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_647tI5Lp94v",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Final adjustments\n",
+        "- filling nans"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-4A3-sjRp8AE",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#location seems to be related to building quality, (knnclassifier)\n",
+        "\n",
+        "def fillna_knn( df, base, target):\n",
+        "    data_colnames = [ target ] + base\n",
+        "    #print(\"data_colnames\",data_colnames)\n",
+        "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
+        "    #print(\"miss\",missing_values_boolflag.head())\n",
+        "    not_missing_boolflag = ~missing_values_boolflag \n",
+        "    #print(\"not miss\",not_missing_boolflag.head())\n",
+        "    number_of_missing_val = missing_values_boolflag.sum()\n",
+        "    print(\"# of miss\",number_of_missing_val)\n",
+        "    not_missing_rows = df.loc[ not_missing_boolflag, data_colnames ]\n",
+        "    #print(not_missing_rows.head())\n",
+        "    Y = not_missing_rows[target]\n",
+        "    X = not_missing_rows[base]\n",
+        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192,stratify=Y)\n",
+        "    metrics       = ['euclidean'] \n",
+        "    weights       = ['distance'] \n",
+        "    numNeighbors  = [5,10,15,20,25]\n",
+        "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
+        "    cv            = StratifiedKFold(n_splits=3,random_state=3192,shuffle=False)\n",
+        "    grid = GridSearchCV(neighbors.KNeighborsClassifier(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='f1_weighted',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
+        "    grid.fit(X_train ,Y_train)\n",
+        "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
+        "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
+        "    print(\"grid.best_params_\",grid.best_params_)\n",
+        "    print(\"grid.scorer_\",grid.scorer_)\n",
+        "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
+        "    y_true, y_pred = Y_test, grid.predict(X_test)\n",
+        "    \n",
+        "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
+        "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
+        "    return Z"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uCyRxp-7qEXf",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "temp['buildingqualitytypeid']=temp['buildingqualitytypeid'].fillna(-1)\n",
+        "temp=temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].replace(-1,np.nan)\n",
+        "print(temp.buildingqualitytypeid.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "\n",
+        "missing_values=fillna_knn(temp,\n",
+        "                  base = [ 'latitude', 'longitude' ] ,\n",
+        "                  target = 'buildingqualitytypeid')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['buildingqualitytypeid'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'buildingqualitytypeid' ]  = missing_values\n",
+        "\n",
+        "print(df_train.buildingqualitytypeid.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oTh_XPErqkHf",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(df_train.heating_system_id.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "temp['heating_system_id']=temp['heating_system_id'].fillna(-1)\n",
+        "temp=temp.groupby(\"heating_system_id\").filter(lambda x: x.heating_system_id.size > 3)\n",
+        "temp['heating_system_id'] = temp['heating_system_id'].replace(-1,np.nan)\n",
+        "print(temp.heating_system_id.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "\n",
+        "missing_values=fillna_knn(temp,\n",
+        "                  base = [ 'latitude', 'longitude' ] ,\n",
+        "                  target = 'heating_system_id')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['heating_system_id'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'heating_system_id' ]  = missing_values\n",
+        "\n",
+        "\n",
+        "print(df_train.heating_system_id.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oVjNSkUYqnCt",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(df_train.ac_id.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "temp['ac_id']=temp['ac_id'].fillna(-1)\n",
+        "temp=temp.groupby(\"ac_id\").filter(lambda x: x.ac_id.size > 3)\n",
+        "temp['ac_id'] = temp['ac_id'].replace(-1,np.nan)\n",
+        "print(temp.ac_id.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "\n",
+        "missing_values=fillna_knn(temp,\n",
+        "                  base = [ 'latitude', 'longitude' ] ,\n",
+        "                  target = 'ac_id')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['ac_id'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'ac_id' ]  = missing_values\n",
+        "\n",
+        "print(df_train.ac_id.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qTbcYbexqr0Y",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#yearbuilt\n",
+        "print(df_train.yearbuilt.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "temp['yearbuilt']=temp['yearbuilt'].fillna(-1)\n",
+        "temp=temp.groupby(\"yearbuilt\").filter(lambda x: x.yearbuilt.size > 3)\n",
+        "temp['yearbuilt'] = temp['yearbuilt'].replace(-1,np.nan)\n",
+        "print(temp.yearbuilt.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "\n",
+        "missing_values=fillna_knn(temp,\n",
+        "                  base = [ 'latitude', 'longitude','buildingqualitytypeid','propertylandusetypeid' ] ,\n",
+        "                  target = 'yearbuilt')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['yearbuilt'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'yearbuilt' ]  = missing_values\n",
+        "print(df_train.yearbuilt.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Gx1LYGmfqxLk",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#location seems to be related to building quality, (knnregressor)\n",
+        "from sklearn.model_selection import KFold\n",
+        "\n",
+        "def fillna_knnr( df, base, target):\n",
+        "    data_colnames = [ target ] + base\n",
+        "    #print(\"data_colnames\",data_colnames)\n",
+        "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
+        "    #print(\"miss\",missing_values_boolflag.head())\n",
+        "    not_missing_boolflag = ~missing_values_boolflag \n",
+        "    #print(\"not miss\",not_missing_boolflag.head())\n",
+        "    number_of_missing_val = missing_values_boolflag.sum()\n",
+        "    print(\"# of miss\",number_of_missing_val)\n",
+        "    not_missing_rows = df.loc[ not_missing_boolflag, data_colnames]\n",
+        "    #print(not_missing_rows.head())\n",
+        "    Y = not_missing_rows[target]\n",
+        "    X = not_missing_rows[base]\n",
+        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192)\n",
+        "    metrics       = ['euclidean'] \n",
+        "    weights       = ['distance'] \n",
+        "    numNeighbors  = [5,10,15,20,25]\n",
+        "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
+        "    cv            = KFold(n_splits=3,random_state=3192,shuffle=False) \n",
+        "    grid = GridSearchCV(neighbors.KNeighborsRegressor(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='neg_mean_absolute_error',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
+        "    grid.fit(X_train ,Y_train)\n",
+        "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
+        "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
+        "    print(\"grid.best_params_\",grid.best_params_)\n",
+        "    print(\"grid.scorer_\",grid.scorer_)\n",
+        "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
+        "    y_true, y_pred = Y_test, grid.predict(X_test) \n",
+        "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
+        "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
+        "    return Z"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pj5PXm7ozg5l",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#garage_sqft\n",
+        "print(df_train.garage_sqft.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.loc[df_train.garagecarcnt>0,df_train.columns].copy()\n",
+        "\n",
+        "print(temp.garage_sqft.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = [ 'latitude', 'longitude','garagecarcnt'] ,\n",
+        "                  target = 'garage_sqft')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['garage_sqft'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'garage_sqft' ] = missing_values\n",
+        "print(df_train.garage_sqft.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "b7e5CFTyzg_M",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df_train = df_train.drop('parcelid', axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YxGquCOOzhD7",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#All the other columns with missing values seems to be  integer, will need regression to be imputed,\n",
+        "#time to get categorical variables hot encoded\n",
+        "\n",
+        "#Identify numerical columns to produce a heatmap\n",
+        "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips', 'heating_system_id','has_hottub_or_spa',\n",
+        "          'just_hottub_or_spa', 'pool_with_spa_tub_yes','pool_with_spa_tub_no','propertylandusetypeid','basement_flag'\n",
+        "          ,'fireplaceflag','taxdelinquencyflag']\n",
+        "numcols = [x for x in df_train.columns if x not in catcols]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uVZkszJEzhHj",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#total_finished_living_area_sqft\n",
+        "\n",
+        "print(df_train.total_finished_living_area_sqft.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.total_finished_living_area_sqft.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = [ 'latitude', 'longitude','basementsqft','numberofstories','poolcnt','garagecarcnt','garage_sqft','propertylandusetypeid'] ,\n",
+        "                  target = 'total_finished_living_area_sqft')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['total_finished_living_area_sqft'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'total_finished_living_area_sqft' ] = missing_values\n",
+        "print(df_train.total_finished_living_area_sqft.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "CVrTMb92zhLX",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#total_bath\t1165\n",
+        "#full_bath\t1182\n",
+        "#half_bath\t1182\n",
+        "#roomcnt\t1416\n",
+        "#bedroomcnt\t1421\n",
+        "\n",
+        "#total_finished_living_area_sqft\n",
+        "\n",
+        "print(df_train.total_bath.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.total_bath.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
+        "                  target = 'total_bath')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['total_bath'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
+        "print(df_train.total_bath.isnull().sum())#total_bath\t1165\n",
+        "#full_bath\t1182\n",
+        "#half_bath\t1182\n",
+        "#roomcnt\t1416\n",
+        "#bedroomcnt\t1421\n",
+        "\n",
+        "#total_finished_living_area_sqft\n",
+        "\n",
+        "print(df_train.total_bath.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.total_bath.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
+        "                  target = 'total_bath')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['total_bath'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
+        "print(df_train.total_bath.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BjIKlu-tzhPI",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# rop half_bath and full bath, as there are only redundant values of total_bath\n",
+        "df_train = df_train.drop(['full_bath','half_bath'], axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "02X1y6EBzhT9",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#bedroomcnt\t1421\n",
+        "\n",
+        "print(df_train.bedroomcnt.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.bedroomcnt.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft','total_bath' ] ,\n",
+        "                  target = 'bedroomcnt')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['bedroomcnt'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'bedroomcnt' ] = missing_values\n",
+        "print(df_train.bedroomcnt.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WzkZ_qeHzhXP",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df_train['total_bath']=df_train.total_bath.round(1)\n",
+        "df_train['bedroomcnt']=df_train.bedroomcnt.round(1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QF9DtDAczhaW",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#recalculate roomcnt\t1416 as we have used imputation for total_bath and bedroomcnt\n",
+        "\n",
+        "df_train.loc[(df_train.roomcnt.isnull()),['roomcnt']]=df_train.total_bath + df_train.bedroomcnt"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "U5N41TBlz60W",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(df_train.shape)\n",
+        "df_train =df_train.loc[(df_train.total_parcel_tax.notnull()) & (df_train.land_tax.notnull()),df_train.columns]\n",
+        "\n",
+        "print(df_train.shape)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "kv9h5yL3z64Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#lot_area_sqft\n",
+        "print(df_train.lot_area_sqft.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.lot_area_sqft.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['latitude','longitude','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+        "                  target = 'lot_area_sqft')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['lot_area_sqft'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'lot_area_sqft' ] = missing_values.round(2)\n",
+        "print(df_train.lot_area_sqft.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GYJLHrR4z68f",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# predict structure_tax and recalculate  total_parcel_tax = land_tax + structure_tax\n",
+        "\n",
+        "\n",
+        "print(df_train.structure_tax.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.structure_tax.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+        "                  target = 'structure_tax')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['structure_tax'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'structure_tax' ] = missing_values.round(2)\n",
+        "print(df_train.structure_tax.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ya-3K06Zz6_y",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#36 total_property_tax_2016 \n",
+        "\n",
+        "#total_parcel_tax = land_tax + structure_tax\n",
+        "    \n",
+        "df_train['total_parcel_tax']=df_train['structure_tax']+df_train['land_tax']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8Fvr7voVz7DX",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#age of the property\n",
+        "df_train['age'] = 2016 - df_train['yearbuilt']\n",
+        "df_train=df_train.drop(['yearbuilt'],axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xl0EOIT-z7Gl",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#total_property_tax_2016\n",
+        "\n",
+        "\n",
+        "print(df_train.total_property_tax_2016.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "print(temp.total_property_tax_2016.isnull().sum())\n",
+        "print(temp.shape)\n",
+        "missing_values=fillna_knnr(temp,\n",
+        "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+        "                  target = 'total_property_tax_2016')\n",
+        "\n",
+        "print(\"predicted output shape\",missing_values.shape)\n",
+        "missing_values_boolflag = df_train['total_property_tax_2016'].isnull()\n",
+        "df_train.loc[ missing_values_boolflag, 'total_property_tax_2016' ] = missing_values.round(2)\n",
+        "print(df_train.total_property_tax_2016.isnull().sum())"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YlaxWegqz7I-",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#check missing values\n",
+        "\n",
+        "missing_df = df_train.isnull().sum(axis=0).reset_index()\n",
+        "missing_df.columns = ['column_name', 'missing_count']\n",
+        "missing_df = missing_df.loc[missing_df['missing_count']>0]\n",
+        "missing_df = missing_df.sort_values(by='missing_count')\n",
+        "print(missing_df)\n",
+        "print(missing_df.shape)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dIl_nqKVz7NQ",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#both the columns above miss 92% of the data, there is no related varibale to impute it, hence dropping them at this point\n",
+        "\n",
+        "df_train = df_train.drop(['finished_living_area_entryfloor_sqft2','finished_living_area_entryfloor_sqft1'], axis=1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "HQJd7rgKz7Qq",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#Identify numerical columns to produce a heatmap\n",
+        "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips','pool_with_spa_tub_no','pool_with_spa_tub_yes','has_hottub_or_spa',\n",
+        "           'just_hottub_or_spa','heating_system_id','propertylandusetypeid','basement_flag','fireplaceflag','taxdelinquencyflag']\n",
+        "numcols = [x for x in df_train.columns if x not in catcols]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VUN3a6uJz7Ut",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# 2 variables are in object datatype, coverting into numeric\n",
+        "df_train[['census_tractnumber','block_number']] = df_train[['census_tractnumber','block_number']].apply(pd.to_numeric)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zGx77rRAz7ZZ",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# dropping categorical columns as xgboost feature selection cannot hadle it\n",
+        "\n",
+        "train_x = df_train.drop(catcols+['logerror'], axis=1)\n",
+        "\n",
+        "train_y=df_train['logerror']\n",
+        "\n",
+        "train_x = train_x.astype(float) \n",
+        "train_y = train_y.astype(float)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "es_Ew2YJz7dT",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "pd.options.display.max_rows = 65\n",
+        "\n",
+        "dtype_df = train_x.dtypes.reset_index()\n",
+        "dtype_df.columns = [\"Count\", \"Column Type\"]\n",
+        "#dtype_df"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bvWIhR38z7fW",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df_train.loc[df_train.has_hottub_or_spa==True,'has_hottub_or_spa']=\"Yes\"\n",
+        "df_train.loc[df_train.has_hottub_or_spa==0,'has_hottub_or_spa']=\"No\"\n",
+        "\n",
+        "df_train.loc[df_train.just_hottub_or_spa==0,'just_hottub_or_spa']=\"No\"\n",
+        "df_train.loc[df_train.just_hottub_or_spa==1,'just_hottub_or_spa']=\"Yes\"\n",
+        "\n",
+        "df_train.loc[df_train.deck_flag==0,'deck_flag']=\"No\"\n",
+        "df_train.loc[df_train.deck_flag==1,'deck_flag']=\"Yes\"\n",
+        "\n",
+        "df_train.loc[df_train.basement_flag==0,'basement_flag']=\"No\"\n",
+        "df_train.loc[df_train.basement_flag==1,'basement_flag']=\"Yes\"\n",
+        "\n",
+        "df_train.loc[df_train.fireplaceflag==False,'fireplaceflag']=\"No\"\n",
+        "df_train.loc[df_train.fireplaceflag==True,'fireplaceflag']=\"Yes\"\n",
+        "#"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ef9JjrmMz7jw",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#ac_id,heating_system_id,propertylandusetypeid\n",
+        "dummieslist=['has_hottub_or_spa','just_hottub_or_spa',\n",
+        "             'deck_flag','fips','basement_flag','fireplaceflag','taxdelinquencyflag']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Z51Zrt2Uz7oD",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "df_train[dummieslist] = df_train[dummieslist].astype(object)\n",
+        "dummies = pd.get_dummies(df_train[dummieslist], prefix= dummieslist)\n",
+        "dummies.shape"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VHBi5Gg6z7tu",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "dummies2=['pool_with_spa_tub_no','pool_with_spa_tub_yes']\n",
+        "df_train[dummies2] = df_train[dummies2].astype(int)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oocTPKI9z7rk",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import MySQLdb\n",
+        "from sqlalchemy import create_engine\n",
+        "engineString = 'mysql+mysqldb://root:MyNewPass@localhost/sakila'\n",
+        "engine = create_engine(engineString)\n",
+        "con=engine.connect()\n",
+        "\n",
+        "with engine.connect() as con, con.begin():\n",
+        "    df_train.to_sql('df_train_f1', engine, chunksize=10000, index =False,if_exists ='replace')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zj5ZLSPlz7XC",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "numcols2=['basementsqft','total_bath','bedroomcnt','total_finished_living_area_sqft','fireplace_count','garagecarcnt',\n",
+        " 'garage_sqft','latitude','longitude','lot_area_sqft','poolcnt','pool_sqft','roomcnt','unitcnt','patio_sqft','storage_sqft',\n",
+        " 'numberofstories','structure_tax','total_parcel_tax','land_tax','total_property_tax_2016','taxdelinquencyyear','transaction_month',\n",
+        " 'census_tractnumber','block_number','age']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fp53dotszhgA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "Y=df_train['logerror']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "O0Uaei4rzhj6",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#buildingqualitytypeid ->has order\n",
+        "le = LabelEncoder()\n",
+        "df_train['buildingqualitytypeid']=le.fit_transform(df_train.buildingqualitytypeid)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "g4-g-uvtzhds",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#df_train.ac_id.value_counts()\n",
+        "#df_train.propertylandusetypeid.value_counts()\n",
+        "#'buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid'"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "SzliXafdzhRd",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "X=pd.concat([dummies,df_train[dummies2],df_train[numcols2],df_train[['buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid']]],axis=1)\n",
+        "X.shape"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DBsZjyQd0W1N",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10, random_state=3192)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ihXFZWcn0W5D",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#  top features\n",
+        "import xgboost as xgb\n",
+        "xgb_params = {\n",
+        "    'eta': 0.05,\n",
+        "    'max_depth': 8,\n",
+        "    'subsample': 0.7,\n",
+        "    'colsample_bytree': 0.7,\n",
+        "    'objective': 'reg:linear',\n",
+        "    'silent': 1,\n",
+        "    'seed' : 0\n",
+        "}\n",
+        "dtrain = xgb.DMatrix(X_train, Y_train, feature_names=X_train.columns.values)\n",
+        "model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=50)\n",
+        "# plot the important features #\n",
+        "fig, ax = plt.subplots(figsize=(12,18))\n",
+        "#max_num_features=50, error for no reason \n",
+        "xgb.plot_importance(model, height=0.8, ax=ax)\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TQEEzNkX0W9w",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#top features\n",
+        "xgboost_selection=['total_finished_living_area_sqft','latitude','structure_tax','total_property_tax_2016',\n",
+        "'total_parcel_tax','land_tax','longitude','lot_area_sqft','census_tractnumber','age','total_bath','bedroomcnt',\n",
+        "'block_number','transaction_month','roomcnt','taxdelinquencyyear','unitcnt','taxdelinquencyflag_No',\n",
+        "'fips_LA','garage_sqft','pool_with_spa_tub_no','has_hottub_or_spa_No','garagecarcnt','deck_flag_No',\n",
+        "'poolcnt','pool_sqft'\n",
+        "]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Rr_6EO4G0XEj",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# feature selection\n",
+        "#c_id,heating_system_id,propertylandusetypeid\n",
+        "from sklearn.ensemble import ExtraTreesRegressor\n",
+        "from sklearn.feature_selection import SelectFromModel\n",
+        "reg = ExtraTreesRegressor(n_estimators=500, max_depth=8, max_features='sqrt',\n",
+        "                          min_samples_split=100 ,min_samples_leaf=10, bootstrap=True,n_jobs=-1, random_state=3192)\n",
+        "reg = reg.fit(X_train, Y_train)\n",
+        "#print(\"importance\",reg.feature_importances_) \n",
+        "model = SelectFromModel(reg, prefit=True)\n",
+        "X_new = model.transform(X_train)\n",
+        "print(X_train.shape)\n",
+        "print(X_new.shape)  \n",
+        "\n",
+        "feat_names = X.columns.values\n",
+        "importances = reg.feature_importances_\n",
+        "std = np.std([tree.feature_importances_ for tree in reg.estimators_], axis=0)\n",
+        "indices = np.argsort(importances)[::-1][:26]\n",
+        "plt.figure(figsize=(12,12))\n",
+        "plt.title(\"Feature importances\")\n",
+        "plt.bar(range(len(indices)), importances[indices], color=\"r\", yerr=std[indices], align=\"center\")\n",
+        "plt.xticks(range(len(indices)), feat_names[indices], rotation='vertical')\n",
+        "plt.xlim([-1, len(indices)])\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "i4FCNOG70XIU",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "tree_selection=[\n",
+        "    'total_finished_living_area_sqft','structure_tax','total_property_tax_2016','total_bath','total_parcel_tax',\n",
+        "    'age','latitude','census_tractnumber','bedroomcnt','longitude','land_tax','propertylandusetypeid','block_number',\n",
+        "    'buildingqualitytypeid','numberofstories','heating_system_id','unitcnt','transaction_month','lot_area_sqft','roomcnt',\n",
+        "    'garage_sqft','garagecarcnt','pool_with_spa_tub_no','poolcnt','fips_LA','taxdelinquencyyear','patio_sqft',\n",
+        "    'taxdelinquencyflag_No','taxdelinquencyflag_Yes'\n",
+        "]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TmIS1WAS0XMW",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.model_selection import KFold\n",
+        "from sklearn.linear_model import Ridge,Lasso\n",
+        "from sklearn.feature_selection import RFECV\n",
+        "from sklearn.linear_model import LinearRegression\n",
+        "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer\n",
+        "\n",
+        "#model=Lasso(alpha=0.2, fit_intercept=True, normalize=True, precompute=False, copy_X=True,\n",
+        " #                                max_iter=1000, \n",
+        "  #                               tol=0.0001, warm_start=False, positive=False, random_state=3192, selection='cyclic')\n",
+        "\n",
+        "#Ridge(random_state=3192,solver='auto',fit_intercept=True,normalize=True,alpha=0.1)\n",
+        "#LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True)\n",
+        "\n",
+        "\n",
+        "rfecv = RFECV(estimator=LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True), step=2, cv=KFold(4),scoring='neg_mean_absolute_error')\n",
+        "rfecv.fit(X_train, Y_train)\n",
+        "\n",
+        "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n",
+        "\n",
+        "# Plot number of features VS. cross-validation scores\n",
+        "plt.figure()\n",
+        "plt.xlabel(\"Number of features selected\")\n",
+        "\n",
+        "plt.ylabel(\"Cross validation score (nb of correct classifications)\")\n",
+        "plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)\n",
+        "plt.show()\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DIw8O00U0XPR",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "rfe_selection = [i for indx,i in enumerate(X.columns) if rfecv.support_[indx] == True]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gHA0x5_80XWy",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#Linear regression with rfe_selection selection\n",
+        "#rfe_selection, tree_selection, xgboost_selection\n",
+        "from sklearn.linear_model import LinearRegression\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer,mean_squared_error\n",
+        "\n",
+        "# just to check whether normalized /not normalized data gives better results\n",
+        "parameters = {'fit_intercept':[True], 'normalize':[True,False], 'copy_X':[True]}\n",
+        "scoring = {'MAE':'neg_mean_absolute_error','MSE': make_scorer(mean_squared_error,greater_is_better=False)}\n",
+        "\n",
+        "grid1 = GridSearchCV(LinearRegression(n_jobs=-1),param_grid=parameters, scoring=scoring,cv=5,refit='MAE',\n",
+        "                    return_train_score=True,\n",
+        "                    verbose=0,n_jobs=-1,pre_dispatch='n_jobs')\n",
+        "\n",
+        "grid1.fit(X_train[rfe_selection], Y_train)\n",
+        "#print(\"5. grid best_score_\",abs(grid.best_score_))\n",
+        "Y_pred = grid1.predict(X_test[rfe_selection])\n",
+        "print(\"MAE on test data\",mean_absolute_error(Y_test,Y_pred))\n",
+        "print(\"MSE on test data\",mean_squared_error(Y_test,Y_pred))\n",
+        "print(\"R Squared data \",r2_score(Y_test,Y_pred))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ekn4pBs60XcT",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#pca selection\n",
+        "from sklearn.decomposition import PCA\n",
+        "from sklearn.preprocessing import scale\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.preprocessing import scale\n",
+        "%matplotlib inline\n",
+        "scaled_x = scale(X)\n",
+        "pca = PCA(n_components=None, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+        "pca.fit(scaled_x)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yFuT-wUN0XfV",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# The amount of variance that each PC explains\n",
+        "var= pca.explained_variance_ratio_\n",
+        "#Cumulative Variance explains\n",
+        "var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)\n",
+        "print(var1)\n",
+        "plt.plot(var1)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "iPN4OBUe0XlD",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#Looking at above plot I'm taking 28 variables\n",
+        "\n",
+        "pca = PCA(n_components=28, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+        "pca.fit(scaled_x)\n",
+        "\n",
+        "pca1=pca.fit_transform(scaled_x)\n",
+        "\n",
+        "pca = PCA(n_components=28, copy=True, whiten=True, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+        "pca.fit(scaled_x)\n",
+        "pca2=pca.fit_transform(scaled_x)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EE4ednPC0XjX",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "pcaX_train, pcaX_test, pcaY_train, pcaY_test = train_test_split(pca1, Y, test_size=0.10, random_state=3192)\n",
+        "pca2X_train, pca2X_test, pca2Y_train, pca2Y_test = train_test_split(pca2, Y, test_size=0.10, random_state=3192)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "erYMXvTG0XaK",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from sklearn.ensemble import GradientBoostingRegressor\n",
+        "from sklearn.metrics import mean_absolute_error,make_scorer\n",
+        "from sklearn.model_selection import GridSearchCV\n",
+        "\n",
+        "# just to check whether normalized /not normalized data gives better results\n",
+        "\n",
+        " # 0.005 for 1200 trees.\n",
+        "param_grid={'n_estimators':[1200],'max_features':[22]}\n",
+        "\n",
+        "              \n",
+        "grid13 = GridSearchCV(GradientBoostingRegressor(subsample=0.8,min_samples_leaf=50,min_samples_split=50,max_depth=9,loss='ls',criterion='friedman_mse',learning_rate=0.005,random_state=3192),\n",
+        "                     param_grid=param_grid, cv=5,refit='MAE',\n",
+        "                    return_train_score=True,\n",
+        "                    verbose=2,n_jobs=-1,pre_dispatch='n_jobs')\n",
+        "\n",
+        "grid13.fit(pcaX_train, pcaY_train)\n",
+        "print(\"5. grid best_score_\",abs(grid13.best_score_))\n",
+        "print(\"best params\",grid13.best_params_)\n",
+        "print(\"best score\",grid13.best_score_)\n",
+        "Y_pred = grid13.predict(pcaX_test)\n",
+        "print(\"MAE on test data\",mean_absolute_error(pcaY_test,Y_pred))\n",
+        "print(\"MSE on test data\",mean_squared_error(pcaY_test,Y_pred))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BgtbLCcR0XUx",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FjdSCEFP0XCM",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From 601d9855ea912d9ee2198f0d3477a44371491ae5 Mon Sep 17 00:00:00 2001
From: Winston <winston@Winstons-MacBook-Pro.local>
Date: Wed, 21 Aug 2019 17:27:26 -0700
Subject: [PATCH 2/7] updated install script; further progress into conversion;
 on final (3/3) section -- current break is on replacing pd.series.filter
 after grouping by buildingtypeid (dropping building types represented 3 or
 fewer times in data)

---
 .../zillow_kaggle_zestimate_comp.ipynb        | 1812 ++++++++++++-----
 1 file changed, 1249 insertions(+), 563 deletions(-)

diff --git a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
index 24a1849f..cda0658e 100644
--- a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
+++ b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
@@ -43,10 +43,10 @@
       "metadata": {
         "id": "W-um5d-x7o46",
         "colab_type": "code",
-        "outputId": "37bf77fb-7f83-49fc-b5e5-514cd049e32d",
+        "outputId": "35d83399-515c-4172-e915-3886511baba2",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 329
+          "height": 302
         }
       },
       "source": [
@@ -61,15 +61,15 @@
         {
           "output_type": "stream",
           "text": [
-            "Thu Aug 15 03:12:33 2019       \n",
+            "Wed Aug 21 22:49:26 2019       \n",
             "+-----------------------------------------------------------------------------+\n",
-            "| NVIDIA-SMI 418.67       Driver Version: 410.79       CUDA Version: 10.0     |\n",
+            "| NVIDIA-SMI 430.40       Driver Version: 410.79       CUDA Version: 10.0     |\n",
             "|-------------------------------+----------------------+----------------------+\n",
             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
             "|===============================+======================+======================|\n",
             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
-            "| N/A   60C    P8    16W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
+            "| N/A   49C    P8    16W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
             "+-------------------------------+----------------------+----------------------+\n",
             "                                                                               \n",
             "+-----------------------------------------------------------------------------+\n",
@@ -98,11 +98,16 @@
       "metadata": {
         "id": "p129YxxnihcV",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "a7de3ee2-b456-45d7-ab54-03eb1d72a956",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        }
       },
       "source": [
-        "!wget -nc https://github.com/rapidsai/notebooks-contrib/blob/master/utils/rapids-colab.sh\n",
-        "!bash rapids-colab.sh\n",
+        "!wget -nc https://raw.githubusercontent.com/randerzander/notebooks-contrib/master/utils/rapids-colab.sh\n",
+        "# RAPIDS 0.9 nightly\n",
+        "!bash rapids-colab.sh 0.9\n",
         "\n",
         "import sys, os\n",
         "\n",
@@ -111,7 +116,256 @@
         "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "--2019-08-21 22:49:32--  https://raw.githubusercontent.com/randerzander/notebooks-contrib/master/utils/rapids-colab.sh\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 1606 (1.6K) [text/plain]\n",
+            "Saving to: ‘rapids-colab.sh’\n",
+            "\n",
+            "\rrapids-colab.sh       0%[                    ]       0  --.-KB/s               \rrapids-colab.sh     100%[===================>]   1.57K  --.-KB/s    in 0s      \n",
+            "\n",
+            "2019-08-21 22:49:33 (231 MB/s) - ‘rapids-colab.sh’ saved [1606/1606]\n",
+            "\n",
+            "--2019-08-21 22:49:33--  https://github.com/rapidsai/notebooks-extended/raw/master/utils/env-check.py\n",
+            "Resolving github.com (github.com)... 140.82.113.3\n",
+            "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n",
+            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
+            "Location: https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py [following]\n",
+            "--2019-08-21 22:49:33--  https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py\n",
+            "Reusing existing connection to github.com:443.\n",
+            "HTTP request sent, awaiting response... 302 Found\n",
+            "Location: https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py [following]\n",
+            "--2019-08-21 22:49:33--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 783 [text/plain]\n",
+            "Saving to: ‘env-check.py’\n",
+            "\n",
+            "env-check.py        100%[===================>]     783  --.-KB/s    in 0s      \n",
+            "\n",
+            "2019-08-21 22:49:33 (125 MB/s) - ‘env-check.py’ saved [783/783]\n",
+            "\n",
+            "Checking for GPU type:\n",
+            "*********************************************\n",
+            "Woo! Your instance has the right kind of GPU!\n",
+            "*********************************************\n",
+            "\n",
+            "Removing conflicting packages, will replace with RAPIDS compatible versions\n",
+            "Uninstalling xgboost-0.90:\n",
+            "  Successfully uninstalled xgboost-0.90\n",
+            "Uninstalling dask-1.1.5:\n",
+            "  Successfully uninstalled dask-1.1.5\n",
+            "Uninstalling distributed-1.25.3:\n",
+            "  Successfully uninstalled distributed-1.25.3\n",
+            "Installing conda\n",
+            "--2019-08-21 22:49:38--  https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
+            "Resolving repo.continuum.io (repo.continuum.io)... 104.18.200.79, 104.18.201.79, 2606:4700::6812:c94f, ...\n",
+            "Connecting to repo.continuum.io (repo.continuum.io)|104.18.200.79|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 58468498 (56M) [application/x-sh]\n",
+            "Saving to: ‘Miniconda3-4.5.4-Linux-x86_64.sh’\n",
+            "\n",
+            "Miniconda3-4.5.4-Li 100%[===================>]  55.76M   151MB/s    in 0.4s    \n",
+            "\n",
+            "2019-08-21 22:49:38 (151 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]\n",
+            "\n",
+            "PREFIX=/usr/local\n",
+            "installing: python-3.6.5-hc3d631a_2 ...\n",
+            "Python 3.6.5 :: Anaconda, Inc.\n",
+            "installing: ca-certificates-2018.03.07-0 ...\n",
+            "installing: conda-env-2.6.0-h36134e3_1 ...\n",
+            "installing: libgcc-ng-7.2.0-hdf63c60_3 ...\n",
+            "installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...\n",
+            "installing: libffi-3.2.1-hd88cf55_4 ...\n",
+            "installing: ncurses-6.1-hf484d3e_0 ...\n",
+            "installing: openssl-1.0.2o-h20670df_0 ...\n",
+            "installing: tk-8.6.7-hc745277_3 ...\n",
+            "installing: xz-5.2.4-h14c3975_4 ...\n",
+            "installing: yaml-0.1.7-had09818_2 ...\n",
+            "installing: zlib-1.2.11-ha838bed_2 ...\n",
+            "installing: libedit-3.1.20170329-h6b74fdf_2 ...\n",
+            "installing: readline-7.0-ha6073c6_4 ...\n",
+            "installing: sqlite-3.23.1-he433501_0 ...\n",
+            "installing: asn1crypto-0.24.0-py36_0 ...\n",
+            "installing: certifi-2018.4.16-py36_0 ...\n",
+            "installing: chardet-3.0.4-py36h0f667ec_1 ...\n",
+            "installing: idna-2.6-py36h82fb2a8_1 ...\n",
+            "installing: pycosat-0.6.3-py36h0a5515d_0 ...\n",
+            "installing: pycparser-2.18-py36hf9f622e_1 ...\n",
+            "installing: pysocks-1.6.8-py36_0 ...\n",
+            "installing: ruamel_yaml-0.15.37-py36h14c3975_2 ...\n",
+            "installing: six-1.11.0-py36h372c433_1 ...\n",
+            "installing: cffi-1.11.5-py36h9745a5d_0 ...\n",
+            "installing: setuptools-39.2.0-py36_0 ...\n",
+            "installing: cryptography-2.2.2-py36h14c3975_0 ...\n",
+            "installing: wheel-0.31.1-py36_0 ...\n",
+            "installing: pip-10.0.1-py36_0 ...\n",
+            "installing: pyopenssl-18.0.0-py36_0 ...\n",
+            "installing: urllib3-1.22-py36hbe7ace6_0 ...\n",
+            "installing: requests-2.18.4-py36he2e5f8d_1 ...\n",
+            "installing: conda-4.5.4-py36_0 ...\n",
+            "installation finished.\n",
+            "WARNING:\n",
+            "    You currently have a PYTHONPATH environment variable set. This may cause\n",
+            "    unexpected behavior when running the Python interpreter in Miniconda3.\n",
+            "    For best results, please verify that your PYTHONPATH only points to\n",
+            "    directories of packages that are compatible with the Python interpreter\n",
+            "    in Miniconda3: /usr/local\n",
+            "Installing RAPIDS packages\n",
+            "Please standby, this will take a few minutes...\n",
+            "\n",
+            "\n",
+            "==> WARNING: A newer version of conda exists. <==\n",
+            "  current version: 4.5.4\n",
+            "  latest version: 4.7.11\n",
+            "\n",
+            "Please update conda by running\n",
+            "\n",
+            "    $ conda update -n base conda\n",
+            "\n",
+            "\n",
+            "bzip2-1.0.8          |  396 KB | : 100% 1.0/1 [00:00<00:00,  6.99it/s]              \n",
+            "requests-2.22.0      |   84 KB | : 100% 1.0/1 [00:00<00:00,  6.56it/s]                \n",
+            "olefile-0.46         |   31 KB | : 100% 1.0/1 [00:00<00:00, 23.03it/s]\n",
+            "yaml-0.1.7           |   78 KB | : 100% 1.0/1 [00:00<00:00, 16.84it/s]\n",
+            "zlib-1.2.11          |  105 KB | : 100% 1.0/1 [00:00<00:00, 15.03it/s]\n",
+            "llvmlite-0.29.0      | 19.9 MB | : 100% 1.0/1 [00:03<00:00,  3.64s/it]               \n",
+            "pyopenssl-19.0.0     |   81 KB | : 100% 1.0/1 [00:00<00:00, 16.66it/s]\n",
+            "thrift-cpp-0.12.0    |  2.4 MB | : 100% 1.0/1 [00:00<00:00,  1.76it/s]              \n",
+            "toolz-0.10.0         |   46 KB | : 100% 1.0/1 [00:00<00:00, 17.97it/s]\n",
+            "libevent-2.1.10      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  2.23it/s]               \n",
+            "libffi-3.2.1         |   46 KB | : 100% 1.0/1 [00:00<00:00, 18.49it/s]\n",
+            "cudf-0.10.0a         |  4.8 MB | : 100% 1.0/1 [00:01<00:00,  1.50s/it]               \n",
+            "snappy-1.1.7         |   39 KB | : 100% 1.0/1 [00:00<00:00, 19.74it/s]\n",
+            "cloudpickle-1.2.1    |   22 KB | : 100% 1.0/1 [00:00<00:00, 17.29it/s]\n",
+            "re2-2019.08.01       |  420 KB | : 100% 1.0/1 [00:00<00:00,  6.36it/s]               \n",
+            "pyjwt-1.7.1          |   17 KB | : 100% 1.0/1 [00:00<00:00, 23.11it/s]\n",
+            "libstdcxx-ng-9.1.0   |  4.0 MB | : 100% 1.0/1 [00:00<00:00,  1.44it/s]               \n",
+            "libgfortran-ng-7.3.0 |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  3.53it/s]               \n",
+            "cython-0.29.13       |  2.2 MB | : 100% 1.0/1 [00:00<00:00,  1.72it/s]               \n",
+            "pyparsing-2.4.2      |   57 KB | : 100% 1.0/1 [00:00<00:00, 19.30it/s]\n",
+            "chardet-3.0.4        |  190 KB | : 100% 1.0/1 [00:00<00:00,  9.45it/s]\n",
+            "rsa-3.4.2            |   31 KB | : 100% 1.0/1 [00:00<00:00, 19.23it/s]\n",
+            "libxgboost-0.90.rapi | 33.2 MB | : 100% 1.0/1 [00:08<00:00,  8.58s/it]               \n",
+            "pyasn1-modules-0.2.6 |   47 KB | : 100% 1.0/1 [00:00<00:00, 12.11it/s]\n",
+            "lz4-c-1.8.3          |  187 KB | : 100% 1.0/1 [00:00<00:00, 12.18it/s]\n",
+            "freetype-2.10.0      |  884 KB | : 100% 1.0/1 [00:00<00:00,  4.76it/s]               \n",
+            "arrow-cpp-0.14.1     | 17.3 MB | : 100% 1.0/1 [00:03<00:00,  3.36s/it]               \n",
+            "oauthlib-3.0.1       |   82 KB | : 100% 1.0/1 [00:00<00:00, 12.63it/s]\n",
+            "libcumlprims-0.9.0   |  3.9 MB | : 100% 1.0/1 [00:01<00:00,  1.55s/it]               \n",
+            "libcugraph-0.10.0a   | 11.2 MB | : 100% 1.0/1 [00:02<00:00,  2.33s/it]               \n",
+            "dask-cuml-0.8.0a     |   30 KB | : 100% 1.0/1 [00:00<00:00,  3.87it/s]                \n",
+            "fastavro-0.22.3      |  408 KB | : 100% 1.0/1 [00:00<00:00,  6.77it/s]               \n",
+            "scipy-1.3.1          | 18.1 MB | : 100% 1.0/1 [00:03<00:00,  3.52s/it]               \n",
+            "certifi-2019.6.16    |  149 KB | : 100% 1.0/1 [00:00<00:00, 15.17it/s]\n",
+            "decorator-4.4.0      |   11 KB | : 100% 1.0/1 [00:00<00:00, 20.06it/s]\n",
+            "google-auth-1.6.3    |   45 KB | : 100% 1.0/1 [00:00<00:00, 16.56it/s]\n",
+            "parquet-cpp-1.5.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 27.43it/s]\n",
+            "rmm-0.10.0a          |   14 KB | : 100% 1.0/1 [00:00<00:00,  3.98it/s] \n",
+            "glog-0.4.0           |  104 KB | : 100% 1.0/1 [00:00<00:00, 15.00it/s]\n",
+            "wheel-0.33.6         |   35 KB | : 100% 1.0/1 [00:00<00:00, 17.29it/s]\n",
+            "bokeh-1.3.4          |  4.0 MB | : 100% 1.0/1 [00:01<00:00,  1.56s/it]              \n",
+            "scikit-learn-0.21.3  |  6.7 MB | : 100% 1.0/1 [00:01<00:00,  1.60s/it]               \n",
+            "libtiff-4.0.10       |  587 KB | : 100% 1.0/1 [00:00<00:00,  6.63it/s]               \n",
+            "idna-2.8             |  132 KB | : 100% 1.0/1 [00:00<00:00, 15.63it/s]\n",
+            "pillow-6.1.0         |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.86it/s]               \n",
+            "_libgcc_mutex-0.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 43.53it/s]\n",
+            "nccl-2.4.6.1         | 66.6 MB | : 100% 1.0/1 [00:10<00:00, 10.59s/it]              \n",
+            "pyyaml-5.1.2         |  184 KB | : 100% 1.0/1 [00:00<00:00, 10.61it/s]\n",
+            "blinker-1.4          |   13 KB | : 100% 1.0/1 [00:00<00:00, 20.08it/s]\n",
+            "librmm-0.10.0a       |   44 KB | : 100% 1.0/1 [00:00<00:00,  3.31it/s]               \n",
+            "sortedcontainers-2.1 |   25 KB | : 100% 1.0/1 [00:00<00:00, 14.67it/s]\n",
+            "cytoolz-0.10.0       |  429 KB | : 100% 1.0/1 [00:00<00:00,  7.83it/s]               \n",
+            "dask-cuda-0.10.0a    |  911 KB | : 100% 1.0/1 [00:00<00:00,  1.66it/s]               \n",
+            "libblas-3.8.0        |   10 KB | : 100% 1.0/1 [00:00<00:00,  5.23it/s] \n",
+            "distributed-2.3.0    |  366 KB | : 100% 1.0/1 [00:00<00:00,  5.36it/s]               \n",
+            "libpng-1.6.37        |  343 KB | : 100% 1.0/1 [00:00<00:00,  8.59it/s]               \n",
+            "jinja2-2.10.1        |   91 KB | : 100% 1.0/1 [00:00<00:00, 15.90it/s]\n",
+            "msgpack-python-0.6.1 |   89 KB | : 100% 1.0/1 [00:00<00:00, 17.11it/s]\n",
+            "numpy-1.17.0         |  5.2 MB | : 100% 1.0/1 [00:01<00:00,  1.30s/it]               \n",
+            "gflags-2.2.2         |  177 KB | : 100% 1.0/1 [00:00<00:00, 11.98it/s]\n",
+            "tk-8.6.9             |  3.2 MB | : 100% 1.0/1 [00:00<00:00,  1.35it/s]               \n",
+            "ca-certificates-2019 |  145 KB | : 100% 1.0/1 [00:00<00:00, 15.40it/s]\n",
+            "cffi-1.12.3          |  218 KB | : 100% 1.0/1 [00:00<00:00, 11.34it/s]\n",
+            "asn1crypto-0.24.0    |  154 KB | : 100% 1.0/1 [00:00<00:00, 11.99it/s]\n",
+            "dlpack-0.2           |   12 KB | : 100% 1.0/1 [00:00<00:00, 24.28it/s]\n",
+            "boost-cpp-1.70.0     | 21.1 MB | : 100% 1.0/1 [00:09<00:00,  9.52s/it]               \n",
+            "pyarrow-0.14.1       |  2.8 MB | : 100% 1.0/1 [00:00<00:00,  1.14it/s]              \n",
+            "markupsafe-1.1.1     |   26 KB | : 100% 1.0/1 [00:00<00:00, 21.22it/s]\n",
+            "six-1.12.0           |   22 KB | : 100% 1.0/1 [00:00<00:00, 17.89it/s]\n",
+            "python-3.6.7         | 34.6 MB | : 100% 1.0/1 [00:05<00:00,  5.94s/it]               \n",
+            "icu-64.2             | 12.6 MB | : 100% 1.0/1 [00:02<00:00,  2.19s/it]               \n",
+            "libopenblas-0.3.7    |  7.6 MB | : 100% 1.0/1 [00:01<00:00,  1.52s/it]               \n",
+            "c-ares-1.15.0        |  100 KB | : 100% 1.0/1 [00:00<00:00, 17.03it/s]\n",
+            "numba-0.45.1         |  3.1 MB | : 100% 1.0/1 [00:00<00:00,  1.00it/s]               \n",
+            "zstd-1.4.0           |  928 KB | : 100% 1.0/1 [00:00<00:00,  5.27it/s]               \n",
+            "pycparser-2.19       |  173 KB | : 100% 1.0/1 [00:00<00:00, 11.22it/s]\n",
+            "openssl-1.1.1c       |  2.1 MB | : 100% 1.0/1 [00:00<00:00,  2.22it/s]               \n",
+            "dask-cudf-0.10.0a    |   63 KB | : 100% 1.0/1 [00:00<00:00,  2.84it/s]                \n",
+            "sqlite-3.29.0        |  2.0 MB | : 100% 1.0/1 [00:00<00:00,  2.75it/s]               \n",
+            "readline-8.0         |  441 KB | : 100% 1.0/1 [00:00<00:00,  7.41it/s]               \n",
+            "tblib-1.4.0          |   12 KB | : 100% 1.0/1 [00:00<00:00, 25.51it/s]\n",
+            "locket-0.2.0         |    6 KB | : 100% 1.0/1 [00:00<00:00, 29.95it/s]\n",
+            "pyasn1-0.4.6         |   52 KB | : 100% 1.0/1 [00:00<00:00, 15.07it/s]\n",
+            "pytz-2019.2          |  228 KB | : 100% 1.0/1 [00:00<00:00,  4.22it/s]              \n",
+            "libcudf-0.10.0a      | 26.0 MB | : 100% 1.0/1 [00:05<00:00,  5.98s/it]               \n",
+            "double-conversion-3. |   85 KB | : 100% 1.0/1 [00:00<00:00, 15.44it/s]\n",
+            "fsspec-0.4.1         |   39 KB | : 100% 1.0/1 [00:00<00:00, 19.96it/s]\n",
+            "uriparser-0.9.3      |   49 KB | : 100% 1.0/1 [00:00<00:00, 19.50it/s]\n",
+            "requests-oauthlib-1. |   19 KB | : 100% 1.0/1 [00:00<00:00, 19.66it/s]\n",
+            "cryptography-2.7     |  607 KB | : 100% 1.0/1 [00:00<00:00,  3.52it/s]               \n",
+            "cachetools-2.1.0     |   10 KB | : 100% 1.0/1 [00:00<00:00, 24.47it/s]\n",
+            "ncurses-6.1          |  1.3 MB | : 100% 1.0/1 [00:01<00:00,  1.02s/it]               \n",
+            "gcsfs-0.3.0          |   19 KB | : 100% 1.0/1 [00:00<00:00, 15.81it/s]\n",
+            "libnvstrings-0.10.0a | 16.8 MB | : 100% 1.0/1 [00:07<00:00,  7.28s/it]               \n",
+            "cudatoolkit-10.0.130 | 380.0 MB | : 100% 1.0/1 [00:56<00:00, 57.00s/it]                \n",
+            "pip-19.2.2           |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  1.62it/s]               \n",
+            "liblapack-3.8.0      |   10 KB | : 100% 1.0/1 [00:00<00:00, 18.78it/s]\n",
+            "click-7.0            |   61 KB | : 100% 1.0/1 [00:00<00:00, 18.70it/s]\n",
+            "cuml-0.10.0a         |  6.0 MB | : 100% 1.0/1 [00:01<00:00,  1.69s/it]              \n",
+            "grpc-cpp-1.23.0      |  4.5 MB | : 100% 1.0/1 [00:01<00:00,  1.10s/it]              \n",
+            "dask-2.3.0           |    4 KB | : 100% 1.0/1 [00:00<00:00, 27.57it/s]\n",
+            "brotli-1.0.7         |  1.0 MB | : 100% 1.0/1 [00:00<00:00,  5.00it/s]               \n",
+            "nvstrings-0.10.0a    |  124 KB | : 100% 1.0/1 [00:00<00:00,  3.47it/s]                \n",
+            "tornado-6.0.3        |  636 KB | : 100% 1.0/1 [00:00<00:00,  4.58it/s]             \n",
+            "pynvml-8.0.2         |   30 KB | : 100% 1.0/1 [00:00<00:00, 21.55it/s]\n",
+            "libgcc-ng-9.1.0      |  8.1 MB | : 100% 1.0/1 [00:01<00:00,  1.40s/it]               \n",
+            "libcblas-3.8.0       |   10 KB | : 100% 1.0/1 [00:00<00:00, 22.83it/s]\n",
+            "joblib-0.13.2        |  180 KB | : 100% 1.0/1 [00:00<00:00,  8.76it/s]\n",
+            "pandas-0.24.2        | 11.1 MB | : 100% 1.0/1 [00:02<00:00,  2.68s/it]               \n",
+            "psutil-5.6.3         |  322 KB | : 100% 1.0/1 [00:00<00:00,  7.88it/s]               \n",
+            "heapdict-1.0.0       |    7 KB | : 100% 1.0/1 [00:00<00:00, 21.63it/s]\n",
+            "jpeg-9c              |  251 KB | : 100% 1.0/1 [00:00<00:00, 10.08it/s]\n",
+            "zict-1.0.0           |   10 KB | : 100% 1.0/1 [00:00<00:00, 20.76it/s]\n",
+            "libprotobuf-3.8.0    |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.06s/it]               \n",
+            "packaging-19.0       |   23 KB | : 100% 1.0/1 [00:00<00:00, 20.95it/s]\n",
+            "xgboost-0.90.rapidsd |   12 KB | : 100% 1.0/1 [00:00<00:00,  2.77it/s] \n",
+            "cugraph-0.10.0a      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  1.74it/s]              \n",
+            "urllib3-1.25.3       |  187 KB | : 100% 1.0/1 [00:00<00:00,  9.23it/s]\n",
+            "py-xgboost-0.90.rapi |   87 KB | : 100% 1.0/1 [00:00<00:00,  3.59it/s]                \n",
+            "dask-core-2.3.0      |  574 KB | : 100% 1.0/1 [00:00<00:00,  4.29it/s]              \n",
+            "setuptools-41.2.0    |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.25it/s]               \n",
+            "pysocks-1.7.0        |   26 KB | : 100% 1.0/1 [00:00<00:00, 21.18it/s]\n",
+            "libcuml-0.10.0a      | 29.7 MB | : 100% 1.0/1 [00:07<00:00,  7.44s/it]                \n",
+            "partd-1.0.0          |   16 KB | : 100% 1.0/1 [00:00<00:00, 21.76it/s]\n",
+            "google-auth-oauthlib |   18 KB | : 100% 1.0/1 [00:00<00:00, 23.67it/s]\n",
+            "python-dateutil-2.8. |  219 KB | : 100% 1.0/1 [00:00<00:00, 11.17it/s]\n",
+            "xz-5.2.4             |  366 KB | : 100% 1.0/1 [00:00<00:00,  7.94it/s]               \n",
+            "Copying shared object files to /usr/lib\n",
+            "\n",
+            "*********************************************\n",
+            "Your Google Colab instance is RAPIDS ready!\n",
+            "*********************************************\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -132,7 +386,11 @@
       "metadata": {
         "id": "x1dLRTm168Tk",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "e4ee4a4e-64f3-4e87-8b87-472b02f84325",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 958
+        }
       },
       "source": [
         "# Info on how to get your api key (kaggle.json) here: https://github.com/Kaggle/kaggle-api#api-credentials\n",
@@ -152,7 +410,86 @@
         "!unzip -q \"/content/properties_2017.csv.zip\""
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting kaggle\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e9/fc/0de659ea1f2096563204925b6660ae141f3d85bbe9e8a1571c3eb6cc1fdd/kaggle-1.5.5.tar.gz (56kB)\n",
+            "\u001b[K     |████████████████████████████████| 61kB 2.9MB/s \n",
+            "\u001b[?25hCollecting urllib3<1.25,>=1.21.1 (from kaggle)\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/01/11/525b02e4acc0c747de8b6ccdab376331597c569c42ea66ab0a1dbd36eca2/urllib3-1.24.3-py2.py3-none-any.whl (118kB)\n",
+            "\u001b[K     |████████████████████████████████| 122kB 9.7MB/s \n",
+            "\u001b[?25hRequirement already satisfied: six>=1.10 in /usr/local/lib/python3.6/site-packages (from kaggle) (1.12.0)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.6/site-packages (from kaggle) (2019.6.16)\n",
+            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/site-packages (from kaggle) (2.8.0)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.6/site-packages (from kaggle) (2.22.0)\n",
+            "Collecting tqdm (from kaggle)\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/a5/83/06029af22fe06b8a7be013aeae5e104b3ed26867e5d4ca91408b30aa602e/tqdm-4.34.0-py2.py3-none-any.whl (50kB)\n",
+            "\u001b[K     |████████████████████████████████| 51kB 12.9MB/s \n",
+            "\u001b[?25hCollecting python-slugify (from kaggle)\n",
+            "  Downloading https://files.pythonhosted.org/packages/a2/5d/bd30413c00bbed3945558aca07c55944073e1e30abeee1f06515281f9811/python-slugify-3.0.3.tar.gz\n",
+            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (2.8)\n",
+            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (3.0.4)\n",
+            "Collecting text-unidecode==1.2 (from python-slugify->kaggle)\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/79/42/d717cc2b4520fb09e45b344b1b0b4e81aa672001dd128c180fabc655c341/text_unidecode-1.2-py2.py3-none-any.whl (77kB)\n",
+            "\u001b[K     |████████████████████████████████| 81kB 28.8MB/s \n",
+            "\u001b[?25hBuilding wheels for collected packages: kaggle, python-slugify\n",
+            "  Building wheel for kaggle (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for kaggle: filename=kaggle-1.5.5-cp36-none-any.whl size=71896 sha256=ee79b8c43069539b819caedf251aae4360d5dd43aec6a5bc2734275442177e60\n",
+            "  Stored in directory: /root/.cache/pip/wheels/db/6a/80/6cd1892eb9b9b136333db3c74e16cba4e17e2c700f51541f06\n",
+            "  Building wheel for python-slugify (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for python-slugify: filename=python_slugify-3.0.3-py2.py3-none-any.whl size=4789 sha256=a8f8df8b4a56a8db4fc841f6b6ff5f89a9a3c7e641ff4fc8c41d5e7a5c1ec087\n",
+            "  Stored in directory: /root/.cache/pip/wheels/0f/96/ca/85f5b01165975402d1e37f8dd346df00dc39be1d0761bd17bb\n",
+            "Successfully built kaggle python-slugify\n",
+            "Installing collected packages: urllib3, tqdm, text-unidecode, python-slugify, kaggle\n",
+            "  Found existing installation: urllib3 1.25.3\n",
+            "    Uninstalling urllib3-1.25.3:\n",
+            "      Successfully uninstalled urllib3-1.25.3\n",
+            "Successfully installed kaggle-1.5.5 python-slugify-3.0.3 text-unidecode-1.2 tqdm-4.34.0 urllib3-1.24.3\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.colab-display-data+json": {
+              "pip_warning": {
+                "packages": [
+                  "urllib3"
+                ]
+              }
+            }
+          },
+          "metadata": {
+            "tags": []
+          }
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "Downloading sample_submission.csv.zip to /content\n",
+            " 91% 9.00M/9.86M [00:00<00:00, 17.1MB/s]\n",
+            "100% 9.86M/9.86M [00:00<00:00, 22.0MB/s]\n",
+            "Downloading properties_2016.csv.zip to /content\n",
+            " 98% 156M/159M [00:01<00:00, 103MB/s] \n",
+            "100% 159M/159M [00:01<00:00, 92.1MB/s]\n",
+            "Downloading zillow_data_dictionary.xlsx.zip to /content\n",
+            "  0% 0.00/15.7k [00:00<?, ?B/s]\n",
+            "100% 15.7k/15.7k [00:00<00:00, 21.5MB/s]\n",
+            "Downloading train_2016_v2.csv.zip to /content\n",
+            "  0% 0.00/632k [00:00<?, ?B/s]\n",
+            "100% 632k/632k [00:00<00:00, 164MB/s]\n",
+            "Downloading train_2017.csv.zip to /content\n",
+            "  0% 0.00/825k [00:00<?, ?B/s]\n",
+            "100% 825k/825k [00:00<00:00, 198MB/s]\n",
+            "Downloading properties_2017.csv.zip to /content\n",
+            " 93% 129M/138M [00:01<00:00, 70.7MB/s]\n",
+            "100% 138M/138M [00:01<00:00, 87.7MB/s]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -170,10 +507,10 @@
       "metadata": {
         "id": "6n75DyJ-dm4B",
         "colab_type": "code",
-        "outputId": "fbd949ae-aa45-4c67-c6e2-74553239623e",
+        "outputId": "b0cc7ddd-7667-475b-ca2f-d3ae6b580331",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 182
+          "height": 168
         }
       },
       "source": [
@@ -212,23 +549,24 @@
         "  - words here\n",
         "* `cuML`\n",
         "  - words here\n",
-        "* `cuPy`\n",
-        "  - words here\n",
         "\n"
       ]
     },
     {
       "cell_type": "code",
       "metadata": {
+        "id": "ZKN5zuROroJD",
         "colab_type": "code",
-        "id": "_Tvf2biLAA9r",
         "colab": {}
       },
       "source": [
-        "# rapids imports\n",
-        "import cudf, cuml, cupy\n",
-        "# general imports \n",
-        "import io, requests  "
+        "# rapids \n",
+        "import cudf, cuml \n",
+        "# switch to cupy next update (once docker has it)\n",
+        "import numpy as np\n",
+        "# general \n",
+        "import seaborn as sns\n",
+        "import matplotlib.pyplot as plt"
       ],
       "execution_count": 0,
       "outputs": []
@@ -254,10 +592,10 @@
       "metadata": {
         "id": "uynoUxpx8Xsn",
         "colab_type": "code",
-        "outputId": "545d3b69-741a-4f23-86df-62ec7f19fb7d",
+        "outputId": "80e4f89a-4c16-41a2-dffe-2db4a7dddbd8",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 227
+          "height": 118
         }
       },
       "source": [
@@ -270,15 +608,16 @@
       "execution_count": 0,
       "outputs": [
         {
-          "output_type": "error",
-          "ename": "TypeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-6-ff87c45bf2d8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n\u001b[0;32m----> 2\u001b[0;31m                           parse_dates=[\"transactiondate\"])\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;31m# peek display 2016 train\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain2016\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mTypeError\u001b[0m: read_csv() got an unexpected keyword argument 'parse_dates'"
-          ]
+          "output_type": "stream",
+          "text": [
+            "   parcelid  logerror transactiondate\n",
+            "0  11016594    0.0276      2016-01-01\n",
+            "1  14366692   -0.1684      2016-01-01\n",
+            "2  12098116   -0.0040      2016-01-01\n",
+            "3  12643413    0.0218      2016-01-02\n",
+            "4  14432541   -0.0050      2016-01-02\n"
+          ],
+          "name": "stdout"
         }
       ]
     },
@@ -287,10 +626,10 @@
       "metadata": {
         "id": "2EfApIzCfEtr",
         "colab_type": "code",
-        "outputId": "eabb1351-f4f9-499c-9aea-2fa2953c11a7",
+        "outputId": "7e91f5f7-7b76-410a-b700-0380b29bd982",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 146
+          "height": 151
         }
       },
       "source": [
@@ -304,13 +643,14 @@
         {
           "output_type": "stream",
           "text": [
-            "   parcelid  airconditioningtypeid  architecturalstyletypeid  basementsqft  bathroomcnt  bedroomcnt  buildingclasstypeid ...  censustractandblock\n",
-            "0  10754147                                                                         0.0         0.0                      ...                     \n",
-            "1  10759547                                                                         0.0         0.0                      ...                     \n",
-            "2  10843547                                                                         0.0         0.0                      ...                     \n",
-            "3  10859147                                                                         0.0         0.0                  3.0 ...                     \n",
-            "4  10879947                                                                         0.0         0.0                  4.0 ...                     \n",
-            "[50 more columns]\n"
+            "   parcelid airconditioningtypeid  ... taxdelinquencyyear censustractandblock\n",
+            "0  10754147                  null  ...               null                null\n",
+            "1  10759547                  null  ...               null                null\n",
+            "2  10843547                  null  ...               null                null\n",
+            "3  10859147                  null  ...               null                null\n",
+            "4  10879947                  null  ...               null                null\n",
+            "\n",
+            "[5 rows x 58 columns]\n"
           ],
           "name": "stdout"
         }
@@ -339,10 +679,10 @@
       "metadata": {
         "id": "o4CvSIcwm4B2",
         "colab_type": "code",
-        "outputId": "6db5ec53-8522-4483-e2fa-d79d9d9d75e8",
+        "outputId": "327cc4dd-bad3-40f2-9d09-41105b532abb",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 146
+          "height": 151
         }
       },
       "source": [
@@ -398,13 +738,14 @@
         {
           "output_type": "stream",
           "text": [
-            "   parcelid             logerror         transactiondate  ac_id  architecturalstyletypeid  basement_sqft  total_bath ...  transaction_month\n",
-            "0  11827818               0.0402 2016-03-15T00:00:00.000                                                         4.0 ...                  3\n",
-            "1  12123024               0.0296 2016-03-15T00:00:00.000                                                         3.0 ...                  3\n",
-            "2  13867327               0.0344 2016-03-15T00:00:00.000                                                         2.0 ...                  3\n",
-            "3  12681894                0.006 2016-03-15T00:00:00.000                                                         3.0 ...                  3\n",
-            "4  12848541  0.06949999999999999 2016-03-15T00:00:00.000    1.0                                                  4.0 ...                  3\n",
-            "[53 more columns]\n"
+            "   parcelid  logerror  ... censustractandblock transaction_month\n",
+            "0  11827818    0.0402  ...        6.037532e+13                 3\n",
+            "1  12123024    0.0296  ...        6.037463e+13                 3\n",
+            "2  13867327    0.0344  ...        6.059011e+13                 3\n",
+            "3  12681894    0.0060  ...        6.037651e+13                 3\n",
+            "4  12848541    0.0695  ...        6.037409e+13                 3\n",
+            "\n",
+            "[5 rows x 61 columns]\n"
           ],
           "name": "stdout"
         }
@@ -463,35 +804,32 @@
       "metadata": {
         "id": "B3-1V93smA9A",
         "colab_type": "code",
-        "outputId": "66d7335e-bc42-4108-a1c1-80f1afb06a4b",
+        "outputId": "28a73c5c-abf2-4325-a575-b654c9ddd9f4",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 380
+          "height": 67
         }
       },
       "source": [
-        "# when poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null, then just_hottub_or_spa =0\n",
+        "# if poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null\n",
         "conditions = ((df_train['pool_count'] == 1) \n",
         "              & (df_train['has_hottub_or_spa'] == 1) \n",
         "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
-        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(0, conditions) "
+        "# then just_hottub_or_spa = 0\n",
+        "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
+        "\n",
+        "print(df_train.just_hottub_or_spa.value_counts())"
       ],
       "execution_count": 0,
       "outputs": [
         {
-          "output_type": "error",
-          "ename": "TypeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-27-10369f477b6c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m conditions = ((df_train['pool_count'] == 1) \n\u001b[1;32m      2\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m               & (df_train['just_hottub_or_spa'].isna() == True))\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'just_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'just_hottub_or_spa'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconditions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    811\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    812\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__eq__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 813\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'eq'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    814\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    815\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mequals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m_unordered_compare\u001b[0;34m(self, other, cmpops)\u001b[0m\n\u001b[1;32m    781\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmpops\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    782\u001b[0m         \u001b[0mnvtx_range_push\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CUDF_UNORDERED_COMP\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"orange\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 783\u001b[0;31m         \u001b[0mother\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_normalize_binop_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    784\u001b[0m         \u001b[0moutcol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmpops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    785\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_copy_construct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutcol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m_normalize_binop_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    777\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    778\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 779\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormalize_binop_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    781\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_unordered_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmpops\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mnormalize_binop_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    703\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    704\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 705\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'cannot broadcast {}'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    706\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    707\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdefault_na_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mTypeError\u001b[0m: cannot broadcast <class 'int'>"
-          ]
+          "output_type": "stream",
+          "text": [
+            "0.0    1204\n",
+            "1.0    1161\n",
+            "Name: just_hottub_or_spa, dtype: int32\n"
+          ],
+          "name": "stdout"
         }
       ]
     },
@@ -502,6 +840,7 @@
         "colab_type": "text"
       },
       "source": [
+        "\n",
         "- when `has_hottub_or_spa` is null and `just_hottub_or_spa` is null\n",
         "  - both should be zero\n"
       ]
@@ -518,13 +857,13 @@
         "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
         "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
         "# just hottub or spa = 0 \n",
-        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(0, conditions) \n",
+        "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
         "\n",
         "# now, if has hottub is null and just hottub is 0 \n",
         "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
         "              & (df_train['just_hottub_or_spa'] == 0))\n",
         "# has hottub or spa = 0 \n",
-        "df_train['has_hottub_or_spa'] = df_train['has_hottub_or_spa'].masked_assign(0, conditions) "
+        "df_train.has_hottub_or_spa.loc[conditions] = 0"
       ],
       "execution_count": 0,
       "outputs": []
@@ -546,17 +885,33 @@
       "metadata": {
         "id": "FBgs7zJm3qk-",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3c3935ec-9d5e-4806-c701-1191f563ccdd",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 67
+        }
       },
       "source": [
         "# when poolcnt=0, has_hottub_or_spa=1\n",
         "conditions = ((df_train['pool_count'] == 0) \n",
         "              & (df_train['has_hottub_or_spa'] == 1))\n",
         "# just_hottub_or_spa=1\n",
-        "df_train['just_hottub_or_spa'] = df_train['just_hottub_or_spa'].masked_assign(1, conditions) \n"
+        "df_train.just_hottub_or_spa.loc[conditions] = 1\n",
+        "\n",
+        "print(df_train.just_hottub_or_spa.value_counts())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0.0    89114\n",
+            "1.0     1161\n",
+            "Name: just_hottub_or_spa, dtype: int32\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -579,7 +934,7 @@
         "# where there is no pool\n",
         "conditions = df_train['pool_count']==0\n",
         "# square footage of non existant pool is 0 \n",
-        "df_train['pool_sqft'] = df_train['pool_sqft'].masked_assign(0, conditions)"
+        "df_train.pool_sqft.loc[conditions] = 0"
       ],
       "execution_count": 0,
       "outputs": []
@@ -606,7 +961,7 @@
         "# where there is no basement\n",
         "conditions = df_train['basement_flag'] == 0\n",
         "# fun fact: we just did this with the pool\n",
-        "df_train['basement_sqft'] = df_train['basement_sqft'].masked_assign(0, conditions) "
+        "df_train.basement_sqft.loc[conditions] = 0"
       ],
       "execution_count": 0,
       "outputs": []
@@ -630,14 +985,27 @@
       "metadata": {
         "id": "OZM6lXmmpj5k",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "1d5124b4-31fa-43ae-ae0b-712ac79fde3b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 50
+        }
       },
       "source": [
         "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
         "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "there are 80668 fireplace_count nulls\n",
+            "there are 90053 fireplaceflag nulls\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -657,25 +1025,41 @@
       "metadata": {
         "id": "i3YRZgU_qZhA",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "a6231c9e-37cd-4766-9743-c85f3aa61654",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 50
+        }
       },
       "source": [
         "# null flags with null counts are zero\n",
         "conditions = ((df_train['fireplace_count'].isna()==True) \n",
         "              & (df_train['fireplaceflag'].isna()==True))\n",
-        "df_train['fireplaceflag'] = df_train['fireplaceflag'].masked_assign(False, conditions)\n",
+        "df_train.fireplaceflag.loc[conditions] = False\n",
         "\n",
         "# true flags for positive fireplace counts\n",
         "conditions = df_train['fireplace_count'] > 0\n",
-        "df_train['fireplaceflag'] = df_train['fireplaceflag'].masked_assign(True, conditions)\n",
+        "df_train.fireplaceflag.loc[conditions] = True\n",
         "\n",
         "# set fireplace count nulls to 0 where false flags are\n",
         "conditions = ((df_train['fireplace_count'].isna()==True) \n",
         "              & (df_train['fireplaceflag']==False))\n",
-        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(0, conditions)"
+        "df_train.fireplace_count.loc[conditions] = 0\n",
+        "\n",
+        "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
+        "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "there are 222 fireplace_count nulls\n",
+            "there are 0 fireplaceflag nulls\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -697,11 +1081,11 @@
       },
       "source": [
         "garage = ['garagecarcnt', 'garage_sqft']\n",
-        "# where garage car count and garage square feet are null, set both to 0\n",
+        "# where garage car count and garage square feet are null\n",
         "conditions = ((df_train['garagecarcnt'].isna()==True) \n",
         "              & (df_train['garage_sqft'].isna()==True))\n",
-        "for i in garage:\n",
-        "  df_train[i] = df_train[i].masked_assign(0, conditions)"
+        "# set both to 0\n",
+        "df_train[garage].loc[conditions] = 0"
       ],
       "execution_count": 0,
       "outputs": []
@@ -722,15 +1106,32 @@
       "metadata": {
         "id": "gbbUIbwJ-ouS",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "115cac03-580c-477e-b5c3-0d191c333b2d",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 118
+        }
       },
       "source": [
         "# show rows where garage count and square feet don't add up\n",
         "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
-        "print(df_train.loc[conditions][garage])"
+        "print(df_train.loc[conditions][garage].head())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "    garagecarcnt  garage_sqft\n",
+            "16           2.0          0.0\n",
+            "29           1.0          0.0\n",
+            "32           1.0          0.0\n",
+            "35           1.0          0.0\n",
+            "36           2.0          0.0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -754,7 +1155,7 @@
         "# where garage count and square feet don't add up\n",
         "conditions = (df_train.garagecarcnt>0) & (df_train.garage_sqft==0)\n",
         "# insert a NaN value\n",
-        "df_train['garage_sqft'] = df_train['garage_sqft'].masked_assign(cupy.nan, conditions)"
+        "df_train.garage_sqft.loc[conditions] = np.nan"
       ],
       "execution_count": 0,
       "outputs": []
@@ -787,14 +1188,13 @@
         "\n",
         "# if full_bath is null & half_bath is null\n",
         "conditions = ((df_train['full_bath'].isnull()==True) \n",
-        "              & (df_train['half_bath'].isnull()==True))\n",
+        "              & (df_train['half_bath'].isnull()==True) \n",
+        "              & (df_train['total_bath']==0))\n",
         "# total_bath=0\n",
-        "df_train['total_bath'] = df_train['total_bath'].masked_assign(0, conditions)\n",
+        "df_train.total_bath.loc[conditions] = np.nan\n",
         "\n",
-        "# when full_bath==total_bath\n",
-        "conditions = df_train.full_bath == df_train.total_bath\n",
-        "# half_bath=0 \n",
-        "df_train['half_bath'] = df_train['half_bath'].masked_assign(0, conditions)"
+        "# when full_bath==total_bath, half_bath=0 \n",
+        "df_train.half_bath.loc[df_train.full_bath == df_train.total_bath] = 0"
       ],
       "execution_count": 0,
       "outputs": []
@@ -821,8 +1221,8 @@
         "colab": {}
       },
       "source": [
-        "df_train['latitude'] = [lat/100000 for lat in df_train['latitude']]\n",
-        "df_train['longitude'] = [long/100000 for long in df_train['longitude']]"
+        "df_train['latitude'] = df_train.latitude / 100000\n",
+        "df_train['longitude'] = df_train.longitude / 100000"
       ],
       "execution_count": 0,
       "outputs": []
@@ -844,12 +1244,15 @@
       "metadata": {
         "id": "yHZH4rMNLfBA",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "6ba5f661-caa5-44b8-b492-b9f5708181db",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 202
+        }
       },
       "source": [
         "# where room count is 0, go ahead and NaN it\n",
-        "conditions = df_train['roomcnt'] == 0\n",
-        "df_train['roomcnt'] = df_train['roomcnt'].masked_assign(cupy.nan, conditions)\n",
+        "df_train.roomcnt.loc[df_train['roomcnt'] == 0] = np.nan\n",
         "\n",
         "\"\"\"\n",
         "propertylandusetypeid & unitcnt are related \n",
@@ -876,29 +1279,50 @@
         "for one in ones:\n",
         "  # adjust conditions to one unit indicator\n",
         "  conditions = ((df_train['propertylandusetypeid'] == one) \n",
-        "                & (df_train['unitcnt'].isnull()))\n",
-        "  df_train['unitcnt'] = df_train['unitcnt'].masked_assign(1, conditions)\n",
+        "                & (df_train['unitcnt'].isna()))\n",
+        "  df_train.unitcnt.loc[conditions] = 1\n",
         "\n",
         "# two units \n",
         "twos = [31,47,246]\n",
         "for two in twos:\n",
         "  # adjust conditions to two unit indicator\n",
         "  conditions = ((df_train['propertylandusetypeid'] == two) \n",
-        "                & (df_train['unitcnt'].isnull()))\n",
-        "  df_train['unitcnt'] = df_train['unitcnt'].masked_assign(2, conditions)\n",
+        "                & (df_train['unitcnt'].isna()))\n",
+        "  df_train.unitcnt.loc[conditions] = 2\n",
         "\n",
         "# three units\n",
         "conditions = ((df_train['propertylandusetypeid'] == 247) \n",
-        "              & (df_train['unitcnt'].isnull()))\n",
-        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(3, conditions)\n",
+        "              & (df_train['unitcnt'].isna()))\n",
+        "df_train.unitcnt.loc[conditions] = 3\n",
         "\n",
         "# four units\n",
         "conditions = ((df_train['propertylandusetypeid'] == 248) \n",
-        "              & (df_train['unitcnt'].isnull()))\n",
-        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(4, conditions)"
+        "              & (df_train['unitcnt'].isna()))\n",
+        "df_train.unitcnt.loc[conditions] = 4\n",
+        "\n",
+        "# let's see how out unit counts look\n",
+        "print(df_train.unitcnt.value_counts())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "1.0      86035\n",
+            "2.0       2372\n",
+            "4.0        884\n",
+            "3.0        622\n",
+            "5.0          1\n",
+            "6.0          1\n",
+            "9.0          1\n",
+            "11.0         1\n",
+            "70.0         1\n",
+            "143.0        1\n",
+            "Name: unitcnt, dtype: int32\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -972,7 +1396,7 @@
       "metadata": {
         "id": "8lYcO_T5XKNN",
         "colab_type": "code",
-        "outputId": "0b77457e-0eed-4e21-be79-1df380432abc",
+        "outputId": "2440dccb-bc7d-459c-ae1a-cc31388be45e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 303
@@ -991,9 +1415,9 @@
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
             "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-24-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1135\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1137\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1138\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1139\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    709\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    710\u001b[0m         ):\n\u001b[0;32m--> 711\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    713\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-28-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1141\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1143\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1145\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    717\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m         ):\n\u001b[0;32m--> 719\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    720\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
             "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
           ]
         }
@@ -1025,7 +1449,11 @@
       "metadata": {
         "id": "Svp6J0cJ5dL0",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "352d2f36-658f-4698-bfdb-5c748b67f0d7",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 67
+        }
       },
       "source": [
         "# if bool 'Y'/None is already set, change string to int bool column via .isna()\n",
@@ -1044,7 +1472,17 @@
         "print(df_train['taxdelinquencyflag'].value_counts())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0    88492\n",
+            "1     1783\n",
+            "Name: taxdelinquencyflag, dtype: int32\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1058,30 +1496,84 @@
         "    - to 2016 - yyyy \n"
       ]
     },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lHh95mAIMrMy",
+        "colab_type": "code",
+        "outputId": "244b62b2-299c-4440-83d2-b5545712ba3e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 67
+        }
+      },
+      "source": [
+        "print(df_train.taxdelinquencyflag.value_counts())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0    88492\n",
+            "1     1783\n",
+            "Name: taxdelinquencyflag, dtype: int32\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
         "id": "6Bic66I9LfGC",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "4311fb13-6d49-44e1-83ef-73e27d4720c4",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 235
+        }
       },
       "source": [
-        "# set year paris -- e.g. from 5 to 2016 - 2005\n",
-        "year_pairs = [(99,2016-1999),(6,2016-2006),(7,2016-2007),(8,2016-2008),\n",
-        "              (9,2016-2009),(10,2016-2010),(11,2016-2011),(12,2016-2012),\n",
-        "              (13,2016-2013),(14,2016-2014),(15,2016-2015)]\n",
-        "# go though year pairs\n",
-        "for pair in year_pairs:\n",
-        "  # tag old value and new value it will be replaced with\n",
-        "  old, new = pair\n",
-        "  # replace old value with new value\n",
-        "  df_train['taxdelinquencyyear'] = df_train['taxdelinquencyyear'].replace(old, \n",
-        "                                                                          new)\n",
-        "# what're we lookin at?\n",
-        "print(df_train['taxdelinquencyyear'].value_counts())"
+        "# no delinquency? set year to 0\n",
+        "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyflag == 0] = 0\n",
+        "# collect x and xx formatted delinquency years w/ matching xxxx year format pair\n",
+        "year_pairs = [(99,1999), (6,2006), (7,2007), (8,2008), (9,2009), (10,2010),\n",
+        "             (11,2011), (12,2012), (13,2013), (14,2014), (15,2015)]\n",
+        "# go through the pairs individually \n",
+        "for year in year_pairs:\n",
+        "  # split the pair in question \n",
+        "  old, new = year\n",
+        "  # replace old year (e.g. 99) with new year (e.g. 1999)\n",
+        "  df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear == old] = new\n",
+        "\n",
+        "# adjust delinquency year relative to training year (2016) \n",
+        "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0] = 2016 - df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0]\n",
+        "# what've we got? \n",
+        "print(df_train.taxdelinquencyyear.value_counts())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0.0     88492\n",
+            "2.0       628\n",
+            "1.0       518\n",
+            "3.0       210\n",
+            "4.0       154\n",
+            "6.0        89\n",
+            "5.0        85\n",
+            "7.0        63\n",
+            "8.0        24\n",
+            "9.0         8\n",
+            "10.0        3\n",
+            "17.0        1\n",
+            "Name: taxdelinquencyyear, dtype: int32\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1099,12 +1591,14 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "b3sh8aGovTLT",
+        "id": "AWx7lq0xkDV2",
         "colab_type": "code",
         "colab": {}
       },
       "source": [
-        "print(df_train['rawcensustractandblock'].head())"
+        "# make a copy of dataframe at this point\n",
+        "# safe = df_train.copy()\n",
+        "df_train = safe.copy()"
       ],
       "execution_count": 0,
       "outputs": []
@@ -1112,39 +1606,106 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "AJrFMIuvvqUr",
+        "id": "Sg0eN-K1QdZy",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "0e6ca58c-3b13-4c9e-c902-4d8a9c98a855",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 474
+        }
       },
       "source": [
-        "# using series instead of dataframe\n",
-        "tractnumber = s_rawcensustractandblock.values_to_string()\n",
-        "# adjust tract number\n",
-        "for i in range(len(cudf_tractnumber)):\n",
-        "  funct = slice(4,11)\n",
-        "  tractnumber[i] = tractnumber[i][funct]\n",
-        "# set new tract number column\n",
-        "df_train['census_tractnumber'] = census_tractnumber\n",
+        "# copy rawcensustractandblock with values as string instead of float\n",
+        "string_data = cudf.Series(df_train['rawcensustractandblock'].values_to_string())\n",
+        "\n",
+        "# print(type(string_data))\n",
+        "# print(len(string_data))\n",
+        "# print(string_data)\n",
+        "\n",
+        "\"\"\"\n",
+        "CURRENT ERROR IN CONVERSION OF VALUES\n",
+        "\"\"\"\n",
+        "print(f\"\\nNOTE: THERE APPEARS TO BE AN ERROR WHEN CONVERTING TO STRING\\n\"\n",
+        "      f\"  > somewhat random numbers added to end of some values\\n    >> e.g. 004, 006\"\n",
+        "      f\"\\n\\n\\ndf_train['rawcensustractandblock'].head(10).values\\n\"\n",
+        "      f\"{df_train['rawcensustractandblock'].head(10).values}\\n\\n\"\n",
+        "      f\"data.head(10).values\\n{string_data.head(10).values}\\n\\n\\n\"\n",
+        "      f\"THE SAME NUMBERS OCCOUR IN THE FIRST WHEN PUT INTO A LIST\\n\"\n",
+        "      f\"  > not sure how to deal with this now\\n\"\n",
+        "      f\"    >> difficult to reproduce without data\\n\\n\")\n",
+        "\"\"\"\n",
+        "CURRENT ERROR IN CONVERSION OF VALUES\n",
+        "\"\"\"\n",
+        "\n",
+        "# set new tract number \n",
+        "df_train['census_tractnumber'] = string_data.str.slice(4, 11)\n",
         "\n",
-        "# using series instead of dataframe\n",
-        "block_number = s_rawcensustractandblock.values_to_string()\n",
         "# set/adjust block number\n",
-        "for i in range(len(block_number)):\n",
-        "  funct = slice(11, None)\n",
-        "  block_number[i] = block_number[i][funct]\n",
-        "  block_number[i] = block_number[i][:4]+'.'+block_number[i][4:]+'0'\n",
-        "  block_number[i] = int(round(float(block_number[i]), 0))\n",
-        "  block_number[i] = str(block_number[i]).ljust(4,'0')\n",
-        "# add block number column to dataframe\n",
-        "df_train['block_number'] = block_number\n",
-        "\n",
-        "# rawcensustractandblock values have been converted\n",
-        "df_train = df_train.drop('rawcensustractandblock', axis=1)\n",
-        "# let's see what we've got\n",
-        "print(df_train[['census_tractnumber', 'block_number']].head(3))"
+        "df_train['block_number'] = string_data.str.slice(11)\n",
+        "df_train['block_number'] = df_train.block_number.str.slice(0,4).str.cat(df_train.block_number.str.slice(4), '.')\n",
+        "df_train['block_number'] = df_train.block_number.astype('float').round(0).astype('int')\n",
+        "df_train['block_number'] = df_train.block_number.astype('str').str.ljust(4, '0')\n",
+        "\n",
+        "# drop raw census tract and block column, no longer needed\n",
+        "df_train=df_train.drop('rawcensustractandblock', axis=1)\n",
+        "\n",
+        "\"\"\"\n",
+        "CORRECT NUMBERS THAT SHOULD BE DISPLAYED BY BELOW PRINT STATEMENT\n",
+        "  > currently not being seen due to prior mentioned error\n",
+        "\n",
+        "tractnumber\n",
+        "0    1066.46\n",
+        "1    0524.22\n",
+        "2    4638.00\n",
+        "3    2963.00\n",
+        "4    0423.38\n",
+        "dtype: object\n",
+        "\n",
+        "blocknumber\n",
+        "0    1001\n",
+        "1    2024\n",
+        "2    3004\n",
+        "3    2002\n",
+        "4    1006\n",
+        "dtype: object\n",
+        "\"\"\"\n",
+        "print(df_train[['census_tractnumber', 'block_number']].head())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "NOTE: THERE APPEARS TO BE AN ERROR WHEN CONVERTING TO STRING\n",
+            "  > somewhat random numbers added to end of some values\n",
+            "    >> e.g. 004, 006\n",
+            "\n",
+            "\n",
+            "df_train['rawcensustractandblock'].head(10).values\n",
+            "[60375315.031013   60374625.001017   60590114.012017   60376513.02100401\n",
+            " 60374087.031018   60375759.011001   60590630.044      60374061.011006\n",
+            " 60378001.022007   60590524.19100901]\n",
+            "\n",
+            "data.head(10).values\n",
+            "['60375315.031013004', '60374625.001017004', '60590114.012017', '60376513.021004006', '60374087.031018004', '60375759.011001', '60590630.044', '60374061.011006', '60378001.022007', '60590524.19100901']\n",
+            "\n",
+            "\n",
+            "THE SAME NUMBERS OCCOUR IN THE FIRST WHEN PUT INTO A LIST\n",
+            "  > not sure how to deal with this now\n",
+            "    >> difficult to reproduce without data\n",
+            "\n",
+            "\n",
+            "  census_tractnumber block_number\n",
+            "0            5315.03         1013\n",
+            "1            4625.00         1017\n",
+            "2            0114.01         2017\n",
+            "3            6513.02         1004\n",
+            "4            4087.03         1018\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1165,7 +1726,11 @@
       "metadata": {
         "id": "xhCosNpXvTVU",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "b8ca9fb3-6c67-4466-d7cc-98ff52504659",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 84
+        }
       },
       "source": [
         "# calculate null value % for each column & frame it\n",
@@ -1185,7 +1750,18 @@
         "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "                       field  percentage\n",
+            "7        buildingclasstypeid    0.999823\n",
+            "3   architecturalstyletypeid    0.997109\n",
+            "33    typeconstructiontypeid    0.996688\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1216,92 +1792,50 @@
       "metadata": {
         "id": "yB2lzAyopS_S",
         "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# highly related propertylandusetypeid\n",
-        "conditions = df_train['propertylandusetypeid'] == 265\n",
-        "#  unitcnt            360\n",
-        "df_train['unitcnt'] = df_train['unitcnt'].masked_assign(10, conditions)\n",
-        "# let's see what we've got\n",
-        "print(df_train['unitcnt'].value_counts())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ofZIC0EdKJ0Y",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: test ready-----"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "c8Zfn-YhlSBO",
-        "colab_type": "code",
-        "outputId": "2087fa66-8683-4040-a3e1-7654942367b7",
+        "outputId": "2860febf-c7ad-4823-d170-2633c4be8ae5",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 34
+          "height": 218
         }
       },
       "source": [
-        "poolsizesum_mean = df_train.loc[df_train['pool_count'] > 0].pool_sqft.mean()\n",
-        "\"\"\"\n",
-        "NEEDS TO BE CONFIRMED WITH OG\n",
-        "> is this supposed to only consider if pool_sqft > 0 as well?\n",
-        "\"\"\"\n",
-        "poolsizesum_mean"
+        "# highly related propertylandusetypeid\n",
+        "df_train['unitcnt'].loc[df_train['propertylandusetypeid'] == 265] = 10\n",
+        "# let's see what we've got\n",
+        "print(df_train['unitcnt'].value_counts())"
       ],
       "execution_count": 0,
       "outputs": [
         {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "28.13881906038769"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 86
+          "output_type": "stream",
+          "text": [
+            "1.0      86035\n",
+            "2.0       2372\n",
+            "4.0        884\n",
+            "3.0        622\n",
+            "10.0       356\n",
+            "5.0          1\n",
+            "6.0          1\n",
+            "9.0          1\n",
+            "11.0         1\n",
+            "70.0         1\n",
+            "143.0        1\n",
+            "Name: unitcnt, dtype: int32\n"
+          ],
+          "name": "stdout"
         }
       ]
     },
     {
-      "cell_type": "code",
+      "cell_type": "markdown",
       "metadata": {
-        "id": "cA30ozCWo5x3",
-        "colab_type": "code",
-        "outputId": "fda7011f-6bee-4b60-e137-ec04d05e440b",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 118
-        }
+        "id": "iR1rBlz-dOdH",
+        "colab_type": "text"
       },
       "source": [
-        "print(df_train.loc[df_train['pool_count'] > 0].pool_sqft.head())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "5    0.0\n",
-            "8    0.0\n",
-            "11    0.0\n",
-            "13    0.0\n",
-            "23    0.0\n",
-            "Name: pool_sqft, dtype: float64\n"
-          ],
-          "name": "stdout"
-        }
+        "- a number of pool sizes are null despite there being a pool\n",
+        "  - let's calculate the average pool size\n",
+        "    - and assume those null values are pools of average size"
       ]
     },
     {
@@ -1309,35 +1843,23 @@
       "metadata": {
         "id": "-icFDeLSoJwl",
         "colab_type": "code",
-        "outputId": "9c5035bd-b766-4509-c5a8-f3a475093dd4",
+        "outputId": "5ea8e799-3105-4601-82d4-54bd00c5056b",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 907
+          "height": 34
         }
       },
       "source": [
-        "print(df_train.loc[df_train.pool_count>0].pool_sqft.value_counts())\n",
-        "print(df_train.pool_sqft.value_counts())\n",
-        "print(df_train.loc[df_train.pool_count>0].pool_sqft.isna().sum())\n",
-        "print(df_train.pool_sqft.isna().sum())\n",
-        "\n",
-        "\n",
-        "\n",
         "# calculate the average pool square footage for properties with a pool(s)\n",
-        "new_value = df_train.loc[df_train['pool_count'] > 0, 'pool_sqft'].mean()\n",
+        "poolsizesum_mean = df_train.pool_sqft.loc[df_train['pool_count'] > 0].mean()\n",
         "\n",
         "# where the property has a pool(s) but pool square feet is 0\n",
         "conditions = ((df_train['pool_count'] > 0) \n",
-        "              & (df_train['pool_sqft'] == 0))\n",
+        "              & (df_train['pool_sqft'].isna()==True))\n",
         "\n",
         "# set pool square feet to the average pool square footage of pool properties\n",
-        "df_train['pool_sqft'] = df_train['pool_sqft'].masked_assign(new_value, conditions)\n",
-        "\n",
+        "df_train['pool_sqft'].loc[conditions] = poolsizesum_mean\n",
         "\n",
-        "print(df_train.loc[df_train.pool_count>0].pool_sqft.value_counts())\n",
-        "print(df_train.pool_sqft.value_counts())\n",
-        "print()\n",
-        "print(df_train.loc[df_train.pool_count>0].pool_sqft.isna().sum())\n",
         "print(df_train.pool_sqft.isna().sum())"
       ],
       "execution_count": 0,
@@ -1345,191 +1867,70 @@
         {
           "output_type": "stream",
           "text": [
-            "0.0    16932\n",
-            "450.0      105\n",
-            "400.0       41\n",
-            "800.0       39\n",
-            "500.0       36\n",
-            "600.0       35\n",
-            "512.0       30\n",
-            "480.0       27\n",
-            "648.0       18\n",
-            "420.0       17\n",
-            "[264 more rows]\n",
-            "dtype: int64\n",
-            "0.0    89306\n",
-            "450.0      105\n",
-            "400.0       41\n",
-            "800.0       39\n",
-            "500.0       36\n",
-            "600.0       35\n",
-            "512.0       30\n",
-            "480.0       27\n",
-            "648.0       18\n",
-            "420.0       17\n",
-            "[264 more rows]\n",
-            "dtype: int64\n",
-            "0\n",
-            "0\n",
-            "28.13881906038769    16932\n",
-            "450.0      105\n",
-            "400.0       41\n",
-            "800.0       39\n",
-            "500.0       36\n",
-            "600.0       35\n",
-            "512.0       30\n",
-            "480.0       27\n",
-            "648.0       18\n",
-            "420.0       17\n",
-            "[264 more rows]\n",
-            "dtype: int64\n",
-            "0.0    72374\n",
-            "28.13881906038769    16932\n",
-            "450.0      105\n",
-            "400.0       41\n",
-            "800.0       39\n",
-            "500.0       36\n",
-            "600.0       35\n",
-            "512.0       30\n",
-            "480.0       27\n",
-            "648.0       18\n",
-            "[265 more rows]\n",
-            "dtype: int64\n",
-            "\n",
-            "0\n",
             "0\n"
           ],
           "name": "stdout"
         }
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AyGeXJfEmJBU",
+        "colab_type": "text"
+      },
+      "source": [
+        "- total parcel tax\n",
+        "- structure tax\n",
+        "- land tax"
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
         "id": "3pVABkZTYK9F",
         "colab_type": "code",
-        "outputId": "42a0b5cc-42e2-41c5-8fdd-11485c45c933",
+        "outputId": "345e4225-6a09-4fae-efb3-c9abe56c622a",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 774
-        }
-      },
-      "source": [
-        "# where total tax and land tax are both greater than 0\n",
-        "\n",
-        "# TESTING (SWITCH TO OG)\n",
-        "# test = df_train.copy()\n",
-        "# test.loc[(test.total_parcel_tax>0) & (test.land_tax>0),'structure_tax']=test['total_parcel_tax']-test['land_tax']\n",
-        "hmm = df_train.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
-        "print(f'{len(hmm)} rows where total and land are greater than 0')\n",
-        "print(f'{len(df_train)} total rows, hopefully the same as above number')\n",
-        "print()\n",
-        "print(len(hmm.loc[hmm.structure_tax!=hmm['total_parcel_tax']-hmm['land_tax']]))\n",
-        "print()\n",
-        "print(hmm.loc[hmm.structure_tax!=hmm['total_parcel_tax']-hmm['land_tax']])\n",
-        "print()\n",
-        "any_neg = hmm.loc[hmm.total_parcel_tax < hmm.land_tax]\n",
-        "# if this comes back as 0, setting all structures to total - land should work\n",
-        "print(f'{len(any_neg)} total taxes are less than same rows land tax\\n')\n",
-        "print(any_neg)\n",
-        "# SWITCH TO RAPIDS \n",
-        "\"\"\"current concern\n",
-        "are there places where total and land are not greater than 0 \n",
-        "and setting structure to their difference is not the best move\"\"\"\n",
-        "\n",
-        "\n",
-        "# # structure tax should be equal to total tax minus land tax\n",
-        "# df_train['structure_tax'] = df_train['total_parcel_tax'] - df_train['land_tax']\n",
-        "new_value = df_train['total_parcel_tax'] - df_train['land_tax']\n",
-        "conditions = (df_train.total_parcel_tax>0) & (df_train.land_tax>0)\n",
-        "df_train['structure_tax'] = df_train['structure_tax'].masked_assign(new_value, conditions)\n",
-        "\n",
-        "# # where structure tax is 0\n",
-        "conditions = df_train['structure_tax'] == 0\n",
-        "# # we do not know the structure tax, so insert a Nan value\n",
-        "df_train['structure_tax'] = df_train['structure_tax'].masked_assign(cupy.nan, conditions)\n",
-        "\n",
-        "# print(test.isna().sum())\n",
-        "# print(test.value_counts().head())\n",
-        "# print(test_1.isna().sum())\n",
-        "# print(test_1.value_counts().head())\n",
-        "\n",
-        "\n",
-        "# SWITCH TO OG \n",
-        "\"\"\"\n",
+          "height": 84
+        }
+      },
+      "source": [
         "#total_parcel_tax\n",
         "#structure_tax\n",
         "#land_tax\n",
         "#total_property_tax_2016\n",
-        "#2)recalculate total_parcel_tax =structure_tax + land_tax\n",
+        "#2)recalculate total_parcel_tax = structure_tax + land_tax\n",
         "\n",
         "# total_parcel_tax =structure_tax + land_tax\n",
         "#->structure_tax=total_parcel_tax -land_tax\n",
         "\n",
-        "df_train.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0),'structure_tax']=df_train['total_parcel_tax']-df_train['land_tax']\n",
+        "# where parcel and land taxes are greater than 0\n",
+        "parcel_taxes = df_train.total_parcel_tax.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
+        "land_taxes = df_train.land_tax.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
+        "# set structure tax to be their difference\n",
+        "df_train['structure_tax'].loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)] = parcel_taxes - land_taxes\n",
         "\n",
-        "#structure_tax, i see a lot of structure tax is 0's, those must be NA's\n",
+        "# where structure tax is still 0, there isn't structure tax\n",
+        "df_train.structure_tax.loc[df_train.structure_tax==0] = np.nan\n",
         "\n",
-        "df_train.loc[df_train.structure_tax==0,'structure_tax']=np.nan\n",
-        "\"\"\"\n",
         "print(df_train.total_property_tax_2016.isnull().sum())\n",
         "print(df_train.structure_tax.isnull().sum())\n",
         "print(df_train.total_parcel_tax.isnull().sum())\n",
-        "print(df_train.land_tax.isnull().sum())\n",
-        "\n",
-        "# SWITCH TO RAPIDS\n",
-        "# print(test[['structure_tax','land_tax','total_parcel_tax']])"
+        "print(df_train.land_tax.isnull().sum())"
       ],
       "execution_count": 0,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "90274 rows where total and land are greater than 0\n",
-            "90275 total rows, hopefully the same as above number\n",
-            "\n",
-            "379\n",
-            "\n",
-            "    parcelid               logerror  ac_id  basement_sqft  total_bath  bedroomcnt  buildingqualitytypeid ...  census_tractnumber\n",
-            "266  17188959                 0.0944                   0.0         0.0         0.0                        ...             0056.00\n",
-            "297  12956410   -0.14850000000000002                   0.0         0.0         0.0                        ...             4080.05\n",
-            "336  12966610                 0.0488                   0.0         6.0         9.0                    7.0 ...             4303.01\n",
-            "454  17188961                  0.003                   0.0         0.0         0.0                        ...             0056.00\n",
-            "474  17188974    0.10260000000000001                   0.0         0.0         0.0                        ...             0056.00\n",
-            "555  17266056                -0.5175                   0.0         0.0         0.0                        ...             0059.08\n",
-            "601  17205423                 0.0733                   0.0         0.0         0.0                        ...             0076.06\n",
-            "790  10858080    0.05450000000000001                   0.0         2.0         3.0                    7.0 ...             1412.01\n",
-            "791  10858080    0.08620000000000001                   0.0         2.0         3.0                    7.0 ...             1412.01\n",
-            "976  11325190  -0.024300000000000002                   0.0         0.0         0.0                        ...             9102.06\n",
-            "[369 more rows]\n",
-            "[38 more columns]\n",
-            "\n",
-            "0 total taxes are less than same rows land tax\n",
-            "\n",
-            "Empty DataFrame\n",
-            "Columns: ['parcelid', 'logerror', 'ac_id', 'basement_sqft', 'total_bath', 'bedroomcnt', 'buildingqualitytypeid', 'census_tractnumber']\n",
-            "Index: []\n"
+            "6\n",
+            "380\n",
+            "1\n",
+            "1\n"
           ],
           "name": "stdout"
-        },
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-93-bdfcd5900cba>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[0mnew_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total_parcel_tax'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'land_tax'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0mconditions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal_parcel_tax\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mland_tax\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'structure_tax'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'structure_tax'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_value\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconditions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[0;31m# # where structure tax is 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mmasked_assign\u001b[0;34m(self, value, mask)\u001b[0m\n\u001b[1;32m   1073\u001b[0m         \"\"\"\n\u001b[1;32m   1074\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1075\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmasked_assign\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1076\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_copy_construct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1077\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36mmasked_assign\u001b[0;34m(self, value, mask)\u001b[0m\n\u001b[1;32m    494\u001b[0m             \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_gpu_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    495\u001b[0m             \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmask_invert\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 496\u001b[0;31m             \u001b[0mvalue\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    497\u001b[0m         )\n\u001b[1;32m    498\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnull_count\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/utils/cudautils.py\u001b[0m in \u001b[0;36mfill_mask\u001b[0;34m(data, mask, value)\u001b[0m\n\u001b[1;32m    235\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    236\u001b[0m         \u001b[0mconfigured\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgpu_fill_masked\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 237\u001b[0;31m         \u001b[0mconfigured\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    238\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    222\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    223\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAutoJitCUDAKernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspecialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    225\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    226\u001b[0m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36mspecialize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    761\u001b[0m         '''\n\u001b[1;32m    762\u001b[0m         argtypes = tuple(\n\u001b[0;32m--> 763\u001b[0;31m             [self.typingctx.resolve_argument_type(a) for a in args])\n\u001b[0m\u001b[1;32m    764\u001b[0m         \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    765\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    761\u001b[0m         '''\n\u001b[1;32m    762\u001b[0m         argtypes = tuple(\n\u001b[0;32m--> 763\u001b[0;31m             [self.typingctx.resolve_argument_type(a) for a in args])\n\u001b[0m\u001b[1;32m    764\u001b[0m         \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margtypes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    765\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/typing/context.py\u001b[0m in \u001b[0;36mresolve_argument_type\u001b[0;34m(self, val)\u001b[0m\n\u001b[1;32m    296\u001b[0m         \"\"\"\n\u001b[1;32m    297\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 298\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mtypeof\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPurpose\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margument\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    299\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    300\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mnumba\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_cuda_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/typing/typeof.py\u001b[0m in \u001b[0;36mtypeof\u001b[0;34m(val, purpose)\u001b[0m\n\u001b[1;32m     32\u001b[0m         msg = _termcolor.errmsg(\n\u001b[1;32m     33\u001b[0m             \"cannot determine Numba type of %r\") % (type(val),)\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: cannot determine Numba type of <class 'cudf.dataframe.series.Series'>"
-          ]
         }
       ]
     },
@@ -1538,7 +1939,11 @@
       "metadata": {
         "id": "8SID48LOpYvu",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "1d369c4a-759e-4331-b5fe-6c784ae66897",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
       },
       "source": [
         "# regionidcounty is exact copy of fips code, dropping the dulicate column\n",
@@ -1546,29 +1951,37 @@
         "df_train.shape"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(90275, 45)"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 51
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "tWmM2J8_pkg1",
         "colab_type": "code",
-        "outputId": "2393cbab-218f-4849-c32c-700495dfb18e",
+        "outputId": "44689c09-a426-48c9-eae8-7e81af63080e",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 622
+          "height": 34
         }
       },
       "source": [
         "#*******************************\n",
         "#bedroomcnt #1421 zero bed room houses ??, observed it's missing all other room count also missing\n",
-        "print(df_train.bedroomcnt.value_counts())\n",
-        "\n",
-        "conditions = df_train['bedroomcnt'] == 0\n",
-        "df_train['bedroomcnt'] = df_train['bedroomcnt'].masked_assign(cupy.nan, conditions)\n",
-        "\n",
-        "\n",
-        "print(df_train.bedroomcnt.value_counts())\n",
+        "# where there is no bedroom, null is a better representation \n",
+        "df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0] = np.nan\n",
         "print(df_train.bedroomcnt.isnull().sum())"
       ],
       "execution_count": 0,
@@ -1576,38 +1989,9 @@
         {
           "output_type": "stream",
           "text": [
-            "3.0    35447\n",
-            "2.0    22357\n",
-            "4.0    20279\n",
-            "5.0     5077\n",
-            "1.0     3897\n",
-            "0.0     1421\n",
-            "6.0     1120\n",
-            "8.0      274\n",
-            "7.0      234\n",
-            "9.0       91\n",
-            "[7 more rows]\n",
-            "dtype: int64\n"
+            "1421\n"
           ],
           "name": "stdout"
-        },
-        {
-          "output_type": "error",
-          "ename": "RuntimeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-94-29ba50e2a85d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, method, sort)\u001b[0m\n\u001b[1;32m   1827\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnull_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1828\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mint64\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1829\u001b[0;31m         \u001b[0mvals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcnts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1830\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcnts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1831\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/numerical.py\u001b[0m in \u001b[0;36mvalue_counts\u001b[0;34m(self, method)\u001b[0m\n\u001b[1;32m    215\u001b[0m             \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"non sort based value_count() not implemented yet\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    216\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m         \u001b[0msegs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msortedvals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unique_segments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    218\u001b[0m         \u001b[0;31m# Return both values and their counts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m         \u001b[0mout_vals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcpp_copying\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_gather_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msortedvals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msegs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36m_unique_segments\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    684\u001b[0m         \u001b[0mdensecol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_dense_buffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    685\u001b[0m         \u001b[0;31m# sort the column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 686\u001b[0;31m         \u001b[0msortcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdensecol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_by_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    687\u001b[0m         \u001b[0;31m# find segments\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    688\u001b[0m         \u001b[0msortedvals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msortcol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/numerical.py\u001b[0m in \u001b[0;36msort_by_values\u001b[0;34m(self, ascending, na_position)\u001b[0m\n\u001b[1;32m    161\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0msort_by_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"last\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 163\u001b[0;31m         \u001b[0msort_inds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_sorted_inds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    164\u001b[0m         \u001b[0mcol_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcpp_copying\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_gather_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort_inds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    165\u001b[0m         col_inds = self.replace(\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/_sort.py\u001b[0m in \u001b[0;36mget_sorted_inds\u001b[0;34m(by, ascending, na_position)\u001b[0m\n\u001b[1;32m     77\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Must use a boolean or list of booleans\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m     \u001b[0mcpp_sort\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_order_by\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mby\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol_inds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mascending\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_position\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mcol_inds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32mcudf/bindings/sort.pyx\u001b[0m in \u001b[0;36mcudf.bindings.sort.apply_order_by\u001b[0;34m()\u001b[0m\n",
-            "\u001b[0;32mcudf/bindings/sort.pyx\u001b[0m in \u001b[0;36mcudf.bindings.sort.apply_order_by\u001b[0;34m()\u001b[0m\n",
-            "\u001b[0;31mRuntimeError\u001b[0m: merge_sort: failed to synchronize: an illegal memory access was encountered"
-          ]
         }
       ]
     },
@@ -1616,10 +2000,10 @@
       "metadata": {
         "id": "3qnP2L9LpmeJ",
         "colab_type": "code",
-        "outputId": "bc0119de-0644-414f-bf59-bd132c7c0e15",
+        "outputId": "a4e9550d-5ea8-4066-d3f3-ea73bfe04cef",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 387
+          "height": 101
         }
       },
       "source": [
@@ -1631,8 +2015,8 @@
         "#                              roomcnt           1416\n",
         "\n",
         "\n",
-        "# roomcnt=(full_bath+half_bath)+ bedroomcnt\n",
-        "# total_bath=fullbath+ 0.5(half_bath)\n",
+        "# roomcnt = (full_bath + half_bath) + bedroomcnt\n",
+        "# total_bath = fullbath+ 0.5(half_bath)\n",
         "\n",
         "#caluculate full bath and half bath again from total bath as, it has few extra columns, (fixes 500 missing values in roomcnt )\n",
         "\n",
@@ -1642,19 +2026,15 @@
         "              & (df_train['bedroomcnt'].isna() == False) \n",
         "              & (df_train['roomcnt'].isna() == True))\n",
         "# calculate room count including all full & half baths along with bedroom count\n",
-        "new_values = df_train.full_bath + df_train.half_bath + df_train.bedroomcnt\n",
-        "df_train['roomcnt'] = df_train['roomcnt'].masked_assign(new_values, conditions)\n",
-        "\n",
-        "\"\"\"df_train.loc[(df_train.full_bath.notnull()) \n",
-        "             & (df_train.half_bath.notnull()) \n",
-        "             & (df_train.bedroomcnt.notnull()) \n",
-        "             & (df_train.roomcnt.isnull()),['roomcnt']]=df_train.full_bath + df_train.half_bath + df_train.bedroomcnt\"\"\"\n",
+        "new_values = df_train.full_bath.loc[conditions] + df_train.half_bath.loc[conditions] + df_train.bedroomcnt.loc[conditions]\n",
+        "# df_train['roomcnt'] = df_train['roomcnt'].masked_assign(new_values, conditions)\n",
+        "df_train.roomcnt.loc[conditions] = new_values\n",
         "\n",
         "\n",
         "# most bedroom count and roomcount null are in same place\n",
         "# all column null count 1133 all columns are null\n",
         "\n",
-        "print(df_train.total_bath.isnull().sum())\n",
+        "print(df_train.total_bath.isna().sum())\n",
         "print(df_train.full_bath.isnull().sum())\n",
         "print(df_train.half_bath.isnull().sum())\n",
         "print(df_train.bedroomcnt.isnull().sum())\n",
@@ -1665,28 +2045,13 @@
         {
           "output_type": "stream",
           "text": [
-            "ERROR:Call to cuOccupancyMaxPotentialBlockSize results in UNKNOWN_CUDA_ERROR\n"
+            "1165\n",
+            "1182\n",
+            "1182\n",
+            "1421\n",
+            "1416\n"
           ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "error",
-          "ename": "CudaAPIError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mCudaAPIError\u001b[0m                              Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-95-d46f327c0313>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'half_bath'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m               \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'bedroomcnt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m               & (df_train['roomcnt'].isna() == True))\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;31m# calculate room count including all full & half baths along with bedroom count\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_bath\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhalf_bath\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbedroomcnt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36misna\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1238\u001b[0m         \"\"\"Identify missing values in a Series. Alias for isnull.\n\u001b[1;32m   1239\u001b[0m         \"\"\"\n\u001b[0;32m-> 1240\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1241\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1242\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mnotna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36misnull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1232\u001b[0m             )\n\u001b[1;32m   1233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1234\u001b[0;31m         \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcudautils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnullmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1235\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/utils/cudautils.py\u001b[0m in \u001b[0;36misnull_mask\u001b[0;34m(data, mask)\u001b[0m\n\u001b[1;32m    432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 434\u001b[0;31m         \u001b[0mgpu_isnull\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    435\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0moutput_dary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    436\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m    226\u001b[0m             \u001b[0mkernel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 228\u001b[0;31m         \u001b[0mtpb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_thread_per_block\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    229\u001b[0m         \u001b[0mtpbm1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtpb\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    230\u001b[0m         \u001b[0mblkct\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mntasks\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtpbm1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mtpb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/compiler.py\u001b[0m in \u001b[0;36m_compute_thread_per_block\u001b[0;34m(self, kernel)\u001b[0m\n\u001b[1;32m    249\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    250\u001b[0m                 \u001b[0;31m# Raises from the driver if the feature is unavailable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 251\u001b[0;31m                 \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtpb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_max_potential_block_size\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    252\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    253\u001b[0m                 \u001b[0;31m# Fallback to table-based approach.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36mget_max_potential_block_size\u001b[0;34m(self, func, b2d_func, memsize, blocksizelimit, flags)\u001b[0m\n\u001b[1;32m    646\u001b[0m                                                     \u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    647\u001b[0m                                                     \u001b[0mb2d_cb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 648\u001b[0;31m                                                     memsize, blocksizelimit)\n\u001b[0m\u001b[1;32m    649\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    650\u001b[0m             driver.cuOccupancyMaxPotentialBlockSizeWithFlags(byref(gridsize), byref(blocksize),\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36msafe_cuda_api_call\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m    288\u001b[0m             \u001b[0m_logger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'call driver api: %s'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibfn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    289\u001b[0m             \u001b[0mretcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlibfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretcode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    291\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0msafe_cuda_api_call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numba/cuda/cudadrv/driver.py\u001b[0m in \u001b[0;36m_check_error\u001b[0;34m(self, fname, retcode)\u001b[0m\n\u001b[1;32m    323\u001b[0m                     \u001b[0m_logger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcritical\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_getpid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m                     \u001b[0;32mraise\u001b[0m \u001b[0mCudaDriverError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"CUDA initialized before forking\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 325\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCudaAPIError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mretcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    326\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    327\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mget_device\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevnum\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mCudaAPIError\u001b[0m: [700] Call to cuOccupancyMaxPotentialBlockSize results in UNKNOWN_CUDA_ERROR"
-          ]
+          "name": "stdout"
         }
       ]
     },
@@ -1709,12 +2074,16 @@
       "metadata": {
         "id": "IW4CG2InpolD",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "47e46700-fe9c-4b98-9941-014ee6dea441",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 252
+        }
       },
       "source": [
-        "# before\n",
-        "print(df_train.numberofstories.isnull().sum())\n",
-        "print(df_train.numberofstories.value_counts())\n",
+        "# before (what's it look like?)\n",
+        "print(f'BEFORE\\n{df_train.numberofstories.value_counts()}\\n'\n",
+        "      f'{df_train.numberofstories.isnull().sum()} remaining null values\\n')\n",
         "\n",
         "#numberofstories\t69705\n",
         "\n",
@@ -1725,121 +2094,126 @@
         "# go through each id pair \n",
         "for type_id in zillow_type_ids:\n",
         "  # split the pair into type id and number of stories\n",
-        "  id, n_stories = type_id\n",
+        "  t_id, n_stories = type_id\n",
         "\n",
         "  # when type id matches and story count is not null\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == id) \n",
+        "  conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
         "                & (df_train['numberofstories'].isna() == False))\n",
+        "  \n",
         "  # calculate the mode story count for matching id properties\n",
-        "  mode_stories = df_train.loc[conditions, 'numberofstories'].mode()\n",
+        "  mode_stories = df_train.numberofstories.loc[conditions].value_counts()\n",
+        "  # when there is at least one value in the value_counts of this property type\n",
+        "  if len(mode_stories) > 0:\n",
+        "    # set mode stories to the most popular value\n",
+        "    mode_stories = mode_stories[0]\n",
+        "  # otherwise\n",
+        "  else:\n",
+        "    # set mode stories to the general average for this property type\n",
+        "    mode_stories = n_stories\n",
+        "  \n",
         "  # and set those non null values to the most common value seen\n",
-        "  df_train['numberofstories'] = df_train['numberofstories'].masked_assign(mode_stories, \n",
-        "                                                                          conditions)\n",
+        "  df_train['numberofstories'].loc[conditions] = mode_stories\n",
         "  \n",
         "  # when type id matches and story count is null\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == id) \n",
+        "  conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
         "                & (df_train['numberofstories'].isna() == False))\n",
         "  # set null values to the common number of stories seen in that type id\n",
-        "  df_train['numberofstories'] = df_train['numberofstories'].masked_assign(n_stories, \n",
-        "                                                                          conditions)\n",
-        "  \n",
-        "# TO BE ADDRESSED\n",
-        "# #https://en.wikipedia.org/wiki/Townhouse , typical town house are usually large, and has atleast 6 rooms\n",
-        "# df_train.loc[(df_train.propertylandusetypeid==264) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "\"\"\"\n",
-        "df_train.loc[(df_train.propertylandusetypeid==246) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==246) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==247) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==247) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==248) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==248) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==260) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==260) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==261) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==261) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
-        "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==263) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==263) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "  df_train['numberofstories'].loc[conditions] = n_stories\n",
         "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==266) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==266) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
+        "# edge cases\n",
+        "conditions = ((df_train.propertylandusetypeid==264) \n",
+        "              & (df_train.numberofstories.isnull()))\n",
+        "df_train.numberofstories.loc[conditions] = 2\n",
         "\n",
-        "df_train.loc[(df_train.propertylandusetypeid==269) & (df_train.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==269) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "prop2016.loc[(prop2016.propertylandusetypeid==275) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==275) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
-        "\n",
-        "prop2016.loc[(prop2016.propertylandusetypeid==267) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==267) & (df_train.numberofstories.isnull()),'numberofstories']=1\n",
-        "\n",
-        "#https://en.wikipedia.org/wiki/Townhouse , typical town house are usually large, and has atleast 6 rooms\n",
-        "df_train.loc[(df_train.propertylandusetypeid==264) & (df_train.numberofstories.isnull()),'numberofstories']=2\n",
-        "\n",
-        "prop2016.loc[(prop2016.propertylandusetypeid==31) & (prop2016.numberofstories.notnull()),'numberofstories'].mode()\n",
-        "df_train.loc[(df_train.propertylandusetypeid==31) & (df_train.numberofstories.isnull()),'numberofstories']=2\"\"\"\n",
-        "\n",
-        "# after\n",
-        "print(df_train.numberofstories.isnull().sum())\n",
-        "print(df_train.numberofstories.value_counts())"
+        "# what's it looking like? \n",
+        "print(f'AFTER\\n{df_train.numberofstories.value_counts()}\\n'\n",
+        "      f'{df_train.numberofstories.isnull().sum()} remaining null values')"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "BEFORE\n",
+            "1.0    12016\n",
+            "2.0     8044\n",
+            "3.0      508\n",
+            "4.0        2\n",
+            "Name: numberofstories, dtype: int32\n",
+            "69705 remaining null values\n",
+            "\n",
+            "AFTER\n",
+            "1.0    20154\n",
+            "2.0      423\n",
+            "3.0        4\n",
+            "Name: numberofstories, dtype: int32\n",
+            "69694 remaining null values\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "AHcMsDCxprd4",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3a327d21-4675-41ce-aa9e-f52ae86eb491",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 286
+        }
       },
       "source": [
-        "\"\"\"skeptical of this0 cell (and the one above)..\n",
-        "author provides no explination for moding\"\"\"\n",
-        "\n",
-        "# before\n",
-        "print(df_train.fireplace_count.isnull().sum())\n",
-        "print(df_train.fireplace_count.value_counts())\n",
+        "# before (what's it looking like?) \n",
+        "print(f'BEFORE\\n{df_train.fireplace_count.value_counts()}\\n'\n",
+        "      f'{df_train.fireplace_count.isnull().sum()} remaining null values\\n')\n",
         "\n",
         "# where there is a fire place, and count is not null\n",
         "conditions = ((df_train.fireplaceflag==1) \n",
         "              & (df_train.fireplace_count.isna() == False))\n",
         "# calculate the mode fireplace count \n",
-        "mode_fire_count = df_train.loc[conditions, 'fireplace_count'].mode()\n",
+        "mode_fire_count = df_train.loc[conditions, 'fireplace_count'].value_counts()[0]\n",
         "# and set those non null values to the most common fireplace count\n",
-        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(mode_fire_count, \n",
-        "                                                                        conditions)\n",
+        "df_train['fireplace_count'].loc[conditions] = mode_fire_count\n",
         "\n",
         "# where there is a fire place, and count is null\n",
         "conditions = ((df_train.fireplaceflag==1) \n",
         "              & (df_train.fireplace_count.isna() == True))\n",
         "# set null values to the most common fireplace count\n",
-        "df_train['fireplace_count'] = df_train['fireplace_count'].masked_assign(1, \n",
-        "                                                                        conditions)\n",
+        "df_train.fireplace_count.loc[conditions] = 1\n",
         "\n",
         "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.notnull()),'fireplace_count'].mode()\n",
         "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.isnull()),'fireplace_count']=1\n",
         "\n",
         "# after\n",
-        "print(df_train.fireplace_count.isnull().sum())\n",
-        "print(df_train.fireplace_count.value_counts())"
+        "print(f'AFTER\\n{df_train.fireplace_count.value_counts()}\\n'\n",
+        "      f'{df_train.fireplace_count.isnull().sum()} remaining null values')"
       ],
       "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DVgF1c_p_bN1",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: break-----"
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "BEFORE\n",
+            "0.0    80446\n",
+            "1.0     8165\n",
+            "2.0     1106\n",
+            "3.0      312\n",
+            "4.0       21\n",
+            "5.0        3\n",
+            "Name: fireplace_count, dtype: int32\n",
+            "222 remaining null values\n",
+            "\n",
+            "AFTER\n",
+            "0.0       80446\n",
+            "8165.0     9607\n",
+            "1.0         222\n",
+            "Name: fireplace_count, dtype: int32\n",
+            "0 remaining null values\n"
+          ],
+          "name": "stdout"
+        }
       ]
     },
     {
@@ -1847,36 +2221,75 @@
       "metadata": {
         "id": "FIuSWoJspt3H",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "9c5daebd-4b2a-461b-8490-350d19fa7ba8",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 317
+        }
       },
       "source": [
-        "import seaborn as sns\n",
+        "\n",
+        "# set basic sns \n",
         "color = sns.color_palette()\n",
         "sns.set(style=\"darkgrid\")\n",
-        "\n",
-        "\n",
-        "ax = sns.countplot(x=\"buildingqualitytypeid\", data=df_train)\n",
-        "\n",
+        "# convert dataframe to pandas for ease of use with sns\n",
+        "pd_train = df_train.to_pandas()\n",
+        "# set ax plot\n",
+        "ax = sns.countplot(x=\"buildingqualitytypeid\", data=pd_train)\n",
+        "# adjust fringe aesthetics\n",
         "plt.xticks(rotation='vertical')\n",
         "plt.title(\"Frequency of Bathroom count\", fontsize=15)\n",
+        "# display the graph\n",
         "plt.show()"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAEsCAYAAACFRGf6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XlYVGX/P/D3DAiCghMIBFQuGEii\nYhBgYiaKIKGoaeKammtuZe4S9OAWiGEuqaVfzUxNS0UWBZdv+eRjmpUVov6U3NmUAXEDYeb8/uDL\neRwBHVC4B3y/rovrYs59Zs7n3LO859znzDkKSZIkEBER1TKl6AKIiOjZxAAiIiIhGEBERCQEA4iI\niIRgABERkRAMICIiEoIB9AxasWIFXFxcyv2NGDFCdGn1WkpKCgIDA+Hm5gZ/f/8K57l06ZLOc+Lq\n6oo333wT4eHhyMvLq/IyExMTsXv37nLTBw0ahA8//LDKj0dVc/LkSaxcuVJ0GQbLWHQBJIaFhQXW\nrVtXbhrVjJKSEsyaNQt+fn5YsGABGjVq9Mj558yZA3d3d2g0GqSnpyM2NhaZmZn46quvqrTcxMRE\n3L17F3369HmS8qmaTp48iS+++AKTJk0SXYpBYgA9o4yMjODu7q73/IWFhWjYsGENVlS/ZWVl4e7d\nu+jduzc8PT0fO3/Lli3l58fDwwOFhYVYvHix8OdB9PKpfuEQHJVTUlICFxcXfP3111iwYAF8fHx0\nvkGnpKSgX79+aNu2LXx9fRETE4OSkhKdx0hKSkKPHj3Qrl07DBs2DH/99RdcXFwQFxens4ytW7fq\n3C82NhadOnXSmXbt2jV88MEHeO2119C+fXuMHj0aFy9elNvLhq2Sk5MRFhYGDw8PvPHGG1i5ciUe\nPtHHmTNnMHbsWHh4eKBDhw545513cPToURQXF+P111/HF198Ua4/Bg0ahClTpjyyzxITExEcHAw3\nNze8+eab+Pzzz6HRaAAAO3bsQLdu3QAAY8eOhYuLS4XLeZRGjRpBq9VCq9XK03744QeEhobCy8sL\nXl5eePfdd3Hq1Cm5ffr06Th48CCOHj0qD+k9vNzdu3eje/fuePXVVzFmzBhkZ2fLbWX9mpiYiOnT\np8PT0xMTJ04EAGg0GixbtgxdunSBm5sbgoODkZiYWKV+KesbFxcXnD59GkOGDEH79u3Rt29fnD59\nGnfu3MGsWbPw6quvonv37khKSnpsP2k0GqxevRo9evSAm5sb3njjDcydO1dnnk2bNsHf3x9ubm7o\n0aMHNm3apNM+ffp0vPPOOzrTyvri8OHDAP77+t28eTNiYmLg7e2Njh07Yv78+bh//768bosXL4ZG\no+EwdyW4BfQMezg0jIyMoFAo5NtfffUVvL29ER0dLX+Qx8fHY+bMmRg0aBCmTZuGS5cuYenSpQBK\n37gA8Ndff+Gjjz5CQEAAwsLCcObMGXzwwQfVqlGtVmPQoEFo2rQpIiMjYWpqirVr12LUqFHYt28f\nTExM5HmjoqIQEBCA5cuX4+eff8aKFSvg7OyMHj16AADOnTuHQYMGwcnJCZGRkWjSpAlSU1ORmZmJ\nBg0aICQkBLt378b7778vP+bFixfx+++/48svv6y0xp9++gnTpk1Dv379MHPmTJw5cwbLly/HzZs3\nER4ejm7dusHCwgJTp06Vh9bs7e0fud6SJKGkpARarRbnz5/Hhg0b0KlTJ5ibm8vzZGRkoF+/fnjx\nxRdx//597NmzB0OGDEFiYiIcHR0xZcoUZGVlobCwEGFhYQCgs9zff/8dWVlZmDNnDu7evYtFixYh\nIiICa9as0all8eLFcr8qlaXfWT/77DN8/fXXmDRpEtq0aYO9e/di2rRpUCqV6Nmzp1798qCZM2di\n6NChGDt2LGJiYjB16lS0bt0aLVu2xIoVK7B9+3bMnDkTnp6esLW1rbTf5s2bh4SEBIwZMwaenp7I\nz8/HgQMH5PYtW7Zg0aJFGDlyJDp16oSjR49i0aJFKC4uxnvvvffI56Qi69atw+uvv46YmBicPn0a\nsbGxeOGFFzBy5Eh069YN586dw+bNm7FlyxYAHOYuR6JnzvLlyyVnZ+dyf0eOHJEkSZKKi4slZ2dn\nqV+/fjr302g0UufOnaV58+bpTN+2bZvUrl07KT8/X5IkSZo4caIUHBwsabVaeZ4VK1ZIzs7O0u7d\nu3WWsWXLFp3H+uyzz6TXX39dvh0TEyN5e3tLN2/elKep1WrJ3d1d2rp1qyRJknTx4kXJ2dlZmj17\nts5jvfXWW9JHH30k3548ebL05ptvSoWFhRX2y/nz5yVnZ2fp119/lactXbpU8vX1lUpKSiq8jyRJ\nUt++faURI0boTFu9erXk6uoqZWdn69T4008/Vfo4D8738F9wcLCUlZVV6f00Go1UXFwsde/eXVq9\nerU8fcKECdK7775bbv7Q0FDJ09NTKigokKetW7dOcnFxkYqKinRqmTx5ss59c3NzpbZt20pffPGF\nzvSRI0dKQUFBVeqX7du3S87OzlJcXJw8z4EDByRnZ2cpLCxMnpafny+1bt1a+u677yrtg7Nnz0rO\nzs7S5s2bK2wvLi6WXn/99XKv37CwMMnT01Ne748++kgaMGCAzjwPP39lr99hw4bpzDd27FgpNDRU\nvr1hwwbJ1dW10pqfdRyCe0ZZWFjg+++/1/lr166dzjxvvvmmzu309HRkZ2ejZ8+eKCkpkf98fHxQ\nWFiI8+fPAyjdAvLz89PZmqrsqK/H+c9//gNfX1+Ym5vLy7OwsECbNm2QmpqqM6+vr6/ObScnJ50h\npWPHjiEoKAimpqYVLsvJyQkdOnTAzp07AQBarRZxcXHo06cPjIyMKrxPcXExzpw5g8DAQJ3pQUFB\n0Gg0+PPPP6u8zgAQFhaG77//Hjt27MDKlSvRsGFDjB07Fvfu3ZPnOXfuHN5//328/vrrcHV1RZs2\nbXD58mWd4clHadeunc438latWkGSJOTk5OjM9/Dr4OzZsygqKiq3zj179sT58+eRn59f5X7p2LGj\n/H+zZs0AAD4+PvK0Jk2aQKVS6TyfDzt27BgAoF+/fhW2Z2Zm4saNGxXWVFBQIL9+q+Jxrzl6NA7B\nPaOMjIzQtm3bR85jbW2tc7vsMOBRo0ZVOH9WVhYAIDc3t9x9H76tr7y8PKSmpiI+Pr5c28NB8vDw\nRoMGDVBUVASgdEgrPz8fNjY2j1xe//79sWjRInz88cc4ceIEsrKyKv1AA0qHCDUaTaXrm5+f/8jl\nVaZZs2Y6z0+HDh3g6+uL3bt3Y9CgQbh16xZGjRoFOzs7zJkzB/b29jA1NcXcuXPldX4cS0tLndsN\nGjQAgHL3f3jdrl+/DgBo2rSpzvSy2wUFBSgqKqpSvzxYS1kdj3o+K5Kfnw8LCwuYmZlV2F4WrA/X\nXVbTzZs3K33sylS1RtLFAKJKPbgFA5R+CwWARYsWwdnZudz8L774IoDSN3Rubq5O28O3jYyMYGxs\njOLiYp3pBQUFOrdVKhVeeeUVjBs3rtzyGjdurOealK6LSqWSPzwrExQUhEWLFiE5ORmHDx/Gq6++\nihYtWlQ6v5WVFYyMjKBWq3Wml62vSqXSu8ZHadq0KZo0aYL09HQApftvcnJysHnzZnmLASjff0/D\nw6+DshDPzc3V+QC+ceMGgNIwadSoUa30y4NUKhVu3bqFe/fuVRhCZfuOKnttlr2+TU1Ny70uqxNO\n9HgcgiO9tWrVCjY2Nrh27Rratm1b7q/sQ6Vt27Y4dOiQzhFo+/fv13kshUIBOzs7+QMVKD2C6ejR\nozrz+fj44Ny5c3BxcSm3vEcFQ0V8fHyQlJQkH6VUEXNzcwQFBeGbb77BgQMHHrn1A5R+43V1dcW+\nfft0pu/duxdGRkZo3759lWqsTHZ2NvLz8+WDCAoLCwFA5yCMX3/9Vd4KfbC+p/2N3MXFBaampuXW\ned++fWjVqhVUKlWt9cuDyobxKvrhLQA4ODigadOmFdbUpEkTtGrVCgDw/PPP4+rVqzqvkyNHjlSr\npgYNGkCj0ZQ74IdKcQuI9GZkZISZM2di7ty5KCgoQOfOnWFsbIwrV65g//79WL16NUxMTDBmzBiE\nhoZi2rRp6Nu3L86ePSvvV3lQ9+7dsX37drRu3RoODg7YsWOH/MFa5r333kNCQgKGDx+OoUOHwtbW\nFjdu3MDx48fh5eWFoKAgveufMmUK+vfvj6FDh2LEiBFQqVQ4deoUmjZtir59+8rz9e/fHwMHDoS5\nubl8RNfjHnfs2LGYN28eAgMDcebMGaxYsQKhoaGPPGLrUf755x9YWlpCkiRkZWVh3bp1sLS0lNe3\nQ4cOMDMzQ1hYGEaNGoWMjAysWrWq3PJatmyJw4cP48CBA7Czs4OdnV21aypjZWWFYcOGYeXKlVAq\nlXjllVewb98+/Pzzz1i2bJk8X030y6O0atUKb7/9NhYuXIgbN27Aw8MDN2/exIEDB7B06VIYGRlh\n4sSJiIyMhKWlJTp27Ihjx45h+/btmDFjhhzm/v7+WLlyJcLCwtCnTx+kpqZWGmqP07JlSwDAxo0b\n4eXlBQsLiyp/carPGEBUJb1794alpSXWrl2L77//HkqlEi+99BK6du0KY+PSl5O7uzuWLl2K2NhY\nHDhwAO3atUNsbGy531ZMmTIFeXl5iI2NRYMGDTBs2DA4OTnh+++/l+extrbG9u3bERsbi0WLFqGg\noAC2trbw8PCAi4tLlWp3cnLCli1bEBMTg3nz5kGhUODll18ud0oad3d3NG3aFJ07d9ZrmK9Lly5Y\nunQp1qxZg7i4OFhZWWH06NGYPHlylep70OLFi+X/mzZtirZt22LhwoXyFpCtrS0+//xzREVFYfz4\n8WjRogUiIyOxevVqnccZOnQozp49izlz5qCgoABTp07VOcy8uj788EM0aNAAmzdvhlqtRvPmzbF0\n6VKdwK6Jfnmc+fPnw9HRETt37sTatWthbW2Nzp07y+2DBw9GcXExvvnmG3z99dewt7fHnDlz8O67\n78rztG7dGgsWLMDatWuRkpICHx8fLFy4EEOGDKlyPT4+Phg5ciQ2btyImJgY+Pj4YOPGjU9jVesF\nhSTxktxU8woKCvDaa68hOjoaISEhost5pDNnziAkJATffPMNvLy8RJdDVG9xC4jo/6jValy4cAHL\nli1D69atGT5ENYwHIRD9n4MHD2LIkCHIy8vTGQIjoprBITgiIhKCW0BERCQEA4iIiITgQQiVyMu7\nA62Wo5NERPpQKhV47rlHX2jxYQygSmi1EgOIiKgG1doQ3Pvvv4/evXujT58+GDx4ME6fPg0AuHDh\nAgYOHIiAgAAMHDhQ50y+NdFGRESGodaOgrt165Z84sIDBw5g1apV2LVrF4YPH463334bISEhiIuL\nww8//CBfobAm2vSVm3ubW0BERHpSKhWwttb/BMFALW4BPXjW3Nu3b0OhUCA3NxdpaWkIDg4GAAQH\nByMtLQ1qtbpG2oiIyHDU6j6gefPm4ciRI5AkCevWrUNmZibs7Ozki30ZGRnB1tYWmZmZkCTpqbdZ\nWVnpXWtVk5yIiKqmVgNo4cKFAEpPlx4dHY2pU6fW5uKrhENwRET6M+ghuAf16dMHx44dw/PPP4/s\n7GxoNBoApdeDycnJgb29Pezt7Z96GxERGY5aCaA7d+4gMzNTvn3o0CE0adIE1tbWcHV1RUJCAgAg\nISEBrq6usLKyqpE2IiIyHLVyFNyNGzfw/vvv4969e1AqlWjSpAlmzZqFNm3aID09HbNnz0ZBQQEs\nLS0RFRUlX8SpJtr0xSE4IiL9VWcIjicjrQQD6NmmsjBBg4amostAcWER8m9VfglxIkNRnQDimRCI\nKtCgoSmSho8UXQaCNm0AGEBUT/FkpEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAR\nEQnBACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQERE\nJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJIRxbSwkLy8PM2fO\nxOXLl2FiYoJmzZohMjISVlZWcHFxgbOzM5TK0iyMjo6Gi4sLAODQoUOIjo6GRqNBmzZtsHjxYpiZ\nmT1RGxERGYZa2QJSKBQYPXo0kpOTER8fjxdffBExMTFy+7Zt2xAXF4e4uDg5fO7cuYOPP/4Ya9as\nwf79+9GoUSOsX7/+idqIiMhw1EoAqVQqeHt7y7fd3d2RkZHxyPscPnwYbm5uaN68OQAgNDQUe/fu\nfaI2IiIyHLUyBPcgrVaLrVu3ws/PT542bNgwaDQavPHGG5g8eTJMTEyQmZkJBwcHeR4HBwdkZmYC\nQLXbqsLaunGV70NUE2xsLESXQFQjaj2A5s+fD3NzcwwdOhQA8OOPP8Le3h63b9/GjBkzsGrVKnz4\n4Ye1XVY5ubm3odVKossgQQzpQ//69VuiSyB6LKVSUeUv7rV6FFxUVBQuXbqEZcuWyQcd2NvbAwAa\nN26MAQMG4Pfff5enPzhMl5GRIc9b3TYiIjIctRZAn332GVJTU7Fq1SqYmJgAAG7evInCwkIAQElJ\nCZKTk+Hq6goA6Ny5M/7++29cvHgRQOmBCj179nyiNiIiMhy1MgR37tw5rF27Fs2bN0doaCgA4IUX\nXsDo0aMRHh4OhUKBkpISdOjQAVOnTgVQukUUGRmJcePGQavVwtXVFfPmzXuiNiIiMhwKSZK4o6MC\n3Af0bLOxsUDS8JGiy0DQpg3cB0R1gsHvAyIiIirDACIiIiFq/TBsqjnPNTGBsYmp0BpK7hch7+Z9\noTUQUd3AAKpHjE1M8Vv0aKE1eMxcB4ABRESPxyE4IiISggFERERCMICIiEgIBhAREQnBACIiIiEY\nQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAA\nERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJEStBFBeXh7GjBmDgIAA9OrVC5Mm\nTYJarQYAnDx5Er1790ZAQABGjRqF3Nxc+X410UZERIahVgJIoVBg9OjRSE5ORnx8PF588UXExMRA\nq9VixowZCA8PR3JyMjw9PRETEwMANdJGRESGo1YCSKVSwdvbW77t7u6OjIwMpKamwtTUFJ6engCA\n0NBQ7Nu3DwBqpI2IiAxHre8D0mq12Lp1K/z8/JCZmQkHBwe5zcrKClqtFvn5+TXSRkREhsO4thc4\nf/58mJubY+jQodi/f39tL15v1taNRZdQZ9nYWIguoV5hf1J9VasBFBUVhUuXLmHNmjVQKpWwt7dH\nRkaG3K5Wq6FUKqFSqWqkrSpyc29Dq5WeYG1rn6F8UF2/fkt0CU/MUPoSqB/9SfWfUqmo8hf3WhuC\n++yzz5CamopVq1bBxMQEAODm5obCwkKcOHECALBt2zYEBgbWWBsRERmOWtkCOnfuHNauXYvmzZsj\nNDQUAPDCCy9g1apViI6ORkREBIqKiuDo6IglS5YAAJRK5VNvIyIiw6GQJKlujTPVkro6BPdb9Gih\nNXjMXFcvhoxsbCyQNHyk6DIQtGlDvehPqv8MegiOiIjoQQwgIiISggFERERCMICIiEgIBhAREQnB\nACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQmhdwCtX7++wukbNmx4\nasUQEdGzQ+8AWrVqVYXTV69e/dSKISKiZ8djL0h39OhRAIBWq8Uvv/yCBy8fdPXqVTRq1KjmqiMi\nonrrsQE0b948AEBRURHmzp0rT1coFLCxsUFYWFjNVUdERPXWYwPo0KFDAICZM2ciOjq6xgsiIqJn\nw2MDqMyD4aPVanXalEoeTEdERFWjdwCdOnUKkZGROHv2LIqKigAAkiRBoVDg9OnTNVYgERHVT3oH\n0OzZs9G1a1csWrQIDRs2rMmaiIjoGaB3AF27dg0ffvghFApFTdZDRETPCL133vj7++Pnn3+uyVqI\niOgZovcWUFFRESZNmgQPDw80bdpUp41HxxERUVXpHUCtWrVCq1atarIWIiJ6hugdQJMmTarJOoiI\n6BmjdwCVnZKnIh07dnwqxRAR0bND7wAqOyVPmby8PBQXF8POzg4HDx587P2joqKQnJyMa9euIT4+\nHs7OzgAAPz8/mJiYwNTUFAAwffp0dO7cGQBw8uRJhIeHo6ioCI6OjliyZAmsra2fqI2IiAyD3kfB\nHTp0SOfvxIkTGD9+PIYOHarX/bt164Zvv/0Wjo6O5dqWL1+OuLg4xMXFyeGj1WoxY8YMhIeHIzk5\nGZ6enoiJiXmiNiIiMhzVPoeOkZERxo8fj3Xr1uk1v6enJ+zt7fV+/NTUVJiamsLT0xMAEBoain37\n9j1RGxERGQ69h+AqcuTIkafyw9Tp06dDkiR4eHhg2rRpsLS0RGZmJhwcHOR5rKysoNVqkZ+fX+02\nlUqld03W1o2feL2eVTY2FqJLqFfYn1Rf6R1AXbp00Qmbe/fu4f79+4iIiHiiAr799lvY29vj/v37\nWLhwISIjIw1iyCw39za0WunxMxoQQ/mgun79lugSnpih9CVQP/qT6j+lUlHlL+56B9CSJUt0bpuZ\nmaFFixZo3PjJthTKhuVMTEwwePBgTJgwQZ6ekZEhz6dWq6FUKqFSqardRkREhkPvfUBeXl7w8vKC\np6cnmjdvjjZt2jxx+Ny9exe3bpV+u5MkCUlJSXB1dQUAuLm5obCwECdOnAAAbNu2DYGBgU/URkRE\nhkPvLaDbt28jMjISSUlJKCkpgbGxMd566y2EhYXBwuLxwxULFixASkoKbty4gZEjR0KlUmHNmjWY\nPHkyNBoNtFotnJyc5CE9pVKJ6OhoRERE6BxO/SRtRERkOBSSJOm1o2P27Nm4c+cOpk2bBkdHR1y7\ndg2xsbEwMzNDVFRUTddZ6+rqPqDfokcLrcFj5rp6sc/CxsYCScNHii4DQZs21Iv+pPqvRvcB/fvf\n/8aBAwdgZmYGAGjRogUWL14Mf3//qlVJRESEKuwDMjU1hVqt1pmWl5cHExOTp14UERHVf3pvAfXv\n3x+jRo3CiBEj4ODggIyMDGzcuBEDBgyoyfqIiKie0juAJkyYADs7O8THxyMnJwe2trYYPXo0A4iI\niKpF7yG4hQsXokWLFti4cSOSkpKwceNGODk5YeHChTVZHxER1VN6B1BCQgLc3Nx0prm5uSEhIeGp\nF0VERPWf3gGkUCig1Wp1ppX9foeIiKiq9A4gT09PfP7553LgaLVarFixQj7rNBERUVVU6YJ048aN\ng6+vLxwcHJCZmQkbGxusWbOmJusjIqJ6Su8Aev7557Fr1y789ddfyMzMhL29Pdq1awelstqXFCIi\nomdYla4HpFQq4e7uDnd395qqh4iInhHcfCEiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERC\nMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEiIWgmgqKgo\n+Pn5wcXFBf/v//0/efqFCxcwcOBABAQEYODAgbh48WKNthERkeGolQDq1q0bvv32Wzg6OupMj4iI\nwODBg5GcnIzBgwcjPDy8RtuIiMhw1EoAeXp6wt7eXmdabm4u0tLSEBwcDAAIDg5GWloa1Gp1jbQR\nEZFhqdIluZ+mzMxM2NnZwcjICABgZGQEW1tbZGZmQpKkp95mZWVVpfqsrRs/xbV9ttjYWIguoV5h\nf1J9JSyADF1u7m1otZLoMqrEUD6orl+/JbqEJ2YofQnUj/6k+k+pVFT5i7uwALK3t0d2djY0Gg2M\njIyg0WiQk5MDe3t7SJL01NuIiMiwCDsM29raGq6urkhISAAAJCQkwNXVFVZWVjXSRkREhkUhSVKN\njzMtWLAAKSkpuHHjBp577jmoVCokJiYiPT0ds2fPRkFBASwtLREVFYWWLVsCQI20VUVdHYL7LXq0\n0Bo8Zq6rF0NGNjYWSBo+UnQZCNq0oV70J9V/1RmCq5UAqosYQNXDAHq6GEBUV1QngHgmBCIiEoIB\nREREQjCAiIhICAYQEREJwQAiIiIhGEBERCQEA4iIiIRgABERkRAMICIiEoIBREREQjCAiIhICAYQ\nEREJwQAiIiIhGEBERCQEA4iIiIRgABERkRAMICIiEoIBREREQjCAiIhICAYQEREJwQAiIiIhGEBE\nRCQEA4iIiIRgABERkRAMICIiEoIBREREQhiLLgAA/Pz8YGJiAlNTUwDA9OnT0blzZ5w8eRLh4eEo\nKiqCo6MjlixZAmtrawCodhsRERkGg9kCWr58OeLi4hAXF4fOnTtDq9VixowZCA8PR3JyMjw9PRET\nEwMA1W4jIiLDYTAB9LDU1FSYmprC09MTABAaGop9+/Y9URsRERkOgxiCA0qH3SRJgoeHB6ZNm4bM\nzEw4ODjI7VZWVtBqtcjPz692m0qlqtV1IiKiyhlEAH377bewt7fH/fv3sXDhQkRGRsLf319oTdbW\njYUuvy6zsbEQXUK9wv6k+sogAsje3h4AYGJigsGDB2PChAkYPnw4MjIy5HnUajWUSiVUKhXs7e2r\n1VYVubm3odVKT7hmtctQPqiuX78luoQnZih9CdSP/qT6T6lUVPmLu/B9QHfv3sWtW6VvMEmSkJSU\nBFdXV7i5uaGwsBAnTpwAAGzbtg2BgYEAUO02IiIyHMK3gHJzczF58mRoNBpotVo4OTkhIiICSqUS\n0dHRiIiI0DmcGkC124iIyHAoJEmqW+NMtaSuDsH9Fj1aaA0eM9fViyEjGxsLJA0fKboMBG3aUC/6\nk+q/OjkER0REzyYGEBERCcEAIiIiIRhAREQkBAOIiIiEYAAREZEQDCAiIhKCAUREREIwgIiISAgG\nEBERCcEAIiIiIYSfjLQusLBsiIamDYTWUFhUjFsFhUJrICJ6mhhAemho2gCDZ34rtIYt0UNwCwwg\nIqo/OARHRERCMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAAERGREPwdEBHVuCaWJjAxNRVd\nBu4XFeFmwX3RZdD/YQARUY0oWKqDAAAQ7ElEQVQzMTXFZ3PGiS4D0xavBcAAMhQcgiMiIiEYQERE\nJAQDiIiIhGAAERGREAwgIiISot4G0IULFzBw4EAEBARg4MCBuHjxouiSiIjoAfU2gCIiIjB48GAk\nJydj8ODBCA8PF10SERE9oF7+Dig3NxdpaWnYsGEDACA4OBjz58+HWq2GlZWVXo+hVCp0bjd9rtFT\nr7OqHq6pIiaW1rVQyaPpU2ddYNZUfF8C9ac/LVXsz6fFwsIUJiZir9J8/34xbt0qkm9Xp18VkiRJ\nT7MoQ5CamopZs2YhMTFRnhYUFIQlS5agTZs2AisjIqIy9XYIjoiIDFu9DCB7e3tkZ2dDo9EAADQa\nDXJycmBvby+4MiIiKlMvA8ja2hqurq5ISEgAACQkJMDV1VXv/T9ERFTz6uU+IABIT0/H7NmzUVBQ\nAEtLS0RFRaFly5aiyyIiov9TbwOIiIgMW70cgiMiIsPHACIiIiEYQEREJAQDiIiIhGAAERGREAwg\nIiISol6ejJT0c+/ePRw+fBiZmZkASs8g0blzZ5ibmwuujIieBfwd0DPqxx9/RFhYGNzc3ORTFGVm\nZiI1NRXz589H165dBVf4XxkZGdi3b59OUAYEBMDR0VFwZbpYJxkyQ/zCyQCqIYb+Jg8KCsLq1avR\nrFkznekXL17EhAkTsHfvXkGV6dqxYwdWrlyJ7t276wTlwYMHMXHiRAwYMEBwhaVY59Nn6O+hMnWh\nTkP9wskAqgF14U3eo0cPpKSkVLmttgUEBGDr1q3lzuOnVqsRGhrKOquortRZF95DQN2p01C/cHIf\nUA1Yt24ddu3aVe5NPnHiRISGhhrEi7JNmzYIDw/HwIED4eDgAKD0m9x3330HV1dXwdX9l1arrfAk\nss899xwM6bsT63y66sJ7CKg7dZaUlJQLHwBo3ry5fNUAERhANaAuvMkXL16M9evXY9asWcjIyIBC\noYCDgwMCAgIwZ84c0eXJfH19MXr0aLzzzjs6Qbl9+3Z06tRJcHX/xTqfrrrwHgLqTp2G+oWTQ3A1\n4F//+heuXLlS4Zv8hRdewCeffCK2wDpEq9Viz5492Lt3LzIyMgAADg4OCAwMREhICJRKw/glAet8\nuurKe6iu1FlYWIj169fLz/uDXzjfe+89mJmZCamLAVQD6sqbvDLXr1+HjY2N6DLoGVZX3kN1pU5D\nxQCicvr06YPdu3eLLuOxTp06hTZt2ogu47FYJxkykV84Gc+17NSpU6JLqNR//vMfAKgT4QMAn3/+\nuegS9MI6ny5Dfg89qK7UOWbMGGHL5hZQLRs7diy+/PJL0WXg/Pnz5aa99957+J//+R9IkoRWrVoJ\nqOrx7ty5g4sXL6JZs2Zo3Lix6HLqvHv37iE9PR0vvfQSLC0tRZejF0N5Dz1OXalTJAbQM6p169Zw\ndHTUOVInOzsbdnZ2UCgUOHjwoMDq/is8PBwffPABrKys8Ntvv2Hy5Ml47rnnoFarsWTJEvj6+oou\nEQDg7e2NXr164e233zaow9gftn//fsyaNQu2traIjo7G1KlTYWZmhtzcXCxevBh+fn6iS6RniUS1\nKjg4WHQJkiRJ0ooVK6TRo0dL165dk6d17dpVYEUV69Wrl/z/sGHDpD///FOSJEn6559/pL59+4oq\nq5yuXbtKCxculHx8fKQ+ffpI33zzjZSfny+6rHJCQkKkM2fOSMePH5e8vLyk3377TZIkSTp//rwU\nEhIiuLry1Gq1lJaWJqWlpUlqtVp0OXWWWq2W5s6dK40cOVLavHmzTtukSZMEVSVJ/B1QDahoeKtM\nXl5eLVZSuUmTJiEtLQ3Tpk1DSEgIBg0aBIVCIbqscoqKiuT/79y5g3bt2gEAWrRogeLiYlFlldOk\nSRPMnTsXM2bMwMGDB7Fz504sXboUb775Jvr3729Qv7FxcXEBADRq1AivvvoqAMDJyUlkSeVcvnwZ\nH3/8MdLS0mBrawsAyMnJwSuvvIJ//etfaN68udgC9dCrVy/Ex8eLLgMAEBERgRdeeAFdunTB1q1b\ncfToUSxbtgzGxsa4cuWKsLoYQDUgODi43PBWmfz8fAEVVeyVV17Bpk2bsHz5cowYMcKgPtDLdOzY\nEZ9++immTp0Kb29vJCUlISgoCEeOHIFKpRJdXjkNGjRAYGAgAgMDkZ2djV27dmH+/PnYt2+f6NIA\nAAqFAunp6SgoKMDdu3dx8uRJuLu748KFC0J/Ef+wmTNnYvDgwdiwYYN8KLNWq0V8fDxmzZqF7777\nTnCFperCl02g9JQ7y5cvBwD4+/sjMjIS48aNwxdffCG0Lu4DqgHdunXDli1bYGdnV66tS5cu+Omn\nnwRU9WgnT57E8ePHMXbsWNGl6Lh//z6io6MRFxcHlUqFK1euwNjYGN7e3vjkk0/w4osvii4RQN05\ndP1///d/MWvWLCiVSsTGxuLLL7/E9evXkZWVhU8++QTBwcGiSwQABAYGVhraj2qrbRXtSy2Tk5OD\n1NRUAVWV17Nnz3Lne4uKikJaWhpycnLEnXxY2OBfPfbpp5/KY+sPmz9/fi1XUz/cuXNHOn36tHTq\n1CmD3Bdw9epV0SVUS0lJifT3339L169fF12KjoEDB0rx8fGSVquVp2m1WikuLk4aMGCAwMp0+fn5\nSVlZWRW2vfHGG7VcTeXGjBkjHT9+vNz0pUuXSi4uLgIqKsUtICIyOBcvXkRERAROnz4tjyRkZ2ej\ndevW+OSTT9CyZUvBFZaKioqCv7+/vC/tQQsWLEBYWJiAqsrLz8+HQqFAkyZNyrWdP39e2M8uGEBE\nZLDUarXOdXYqOvEnPRmRB0vwIAQiMlhWVlblQseQji57FEOq01APlmAAEZHBqewDU5Ikgzq6zFA/\n2B9mqEfmMoCIyOAY6gfmw+pKnY6Ojo88MlcUBhARGRxD/cB8WF2ps0ePHrh27VqFdfr7+wuoqBQD\niIgMjqF+YD6srtQ5a9asSttEHqnHo+CIiEgIXg+IiIiEYAAREZEQDCCqc/z8/OSrt1ZFeHg4Vq1a\nBQA4duwY3njjjUrnnT17NmJjYwEAJ06cQEBAQPWKrUUP9suaNWswb948wRU93p49ezBq1KhK24cN\nG4YdO3bUYkVUm3gQAj0zIiMjq3U/T09PJCcnP+Vqatb48ePl/69evYpu3brh1KlTMDZ+/Fv+2LFj\nmDFjBg4fPlyTJQIAevfujd69e9f4csgwcQuIiIiEYABRnfT3338jKCgIr732GubMmYOioiLs3LkT\ngwYN0pnPxcUFly5dAqA7rPawtLQ09O3bFx06dMAHH3ygcyG8h4fr/Pz8sH79evTq1QseHh7l5v/q\nq6/g6+sLX19f7NixQ6eGvLw8jB8/Hq+++ir69++PZcuWyTVfvXoVLi4uKCkpkR/rwSGoy5cvY/jw\n4fD29oa3tzc++ugjFBQUVLg+K1aswPTp0wEAQ4cOBQC89tpr6NChA44fPw4vLy+cPXtWnj83Nxft\n27fHtWvXMGbMGOTk5KBDhw7o0KEDsrOz0b59e51f9p86dQo+Pj4oLi7Gzp07ERoaisjISHh4eCAw\nMBBHjx6V57116xbmzp0LX19fdO7cGbGxsfK1hx5+zo4cOYLAwEB4eHggMjKywh94Uv3BAKI6KT4+\nHuvXr8f+/ftx4cKFJ7qw1v379zFx4kSEhITg+PHjCAwMREpKyiPvs3fvXqxbtw4HDx7E2bNnsXPn\nTgDA4cOHsXHjRmzYsAH79+/HsWPHdO4XGRkJU1NT/Pzzz1i0aBF++OEHveuUJAnjxo3Dv//9b+zd\nuxdZWVlYsWLFY++3efNmAMCvv/6KP/74A15eXggKCsKePXvkeRISEtCxY0c4Ojriq6++gq2tLf74\n4w/88ccfsLOzg5eXl841Y+Li4vDWW2+hQYMGAIC//voLL730En755RdMmTIFkyZNks8EMHv2bBgb\nGyMlJQW7d+/GkSNHKtyvo1arMWnSJHzwwQf45Zdf8NJLL+H333/Xu3+o7mEAUZ00ZMgQ2NvbQ6VS\nYcKECUhMTKz2Y/35558oLi7Gu+++K1/RtG3bto+8z7Bhw2BnZweVSoWuXbvi9OnTAEqDqV+/fnj5\n5ZdhZmaGyZMny/fRaDRISUnBlClTYG5uDmdnZ/Tt21fvOps1a4ZOnTrBxMQEVlZWGDlyJH799ddq\nrXPfvn2RmJgob2HExcU9cl9M37595cDSaDRITExESEiI3G5lZSX3X1BQEFq0aIEff/wRN27cwE8/\n/YS5c+fC3Nwc1tbWGDFiRIXP1+HDh/Hyyy8jMDAQDRo0wLvvvoumTZtWa/2obuBBCFQn2dvby/87\nODggJyen2o+Vk5MDOzs7KBQKncd8FBsbG/l/MzMzefk5OTlwc3OrsE61Wo2SkpJytevrxo0bWLhw\nIU6cOIE7d+5AkiRYWlrqff8HtW/fHg0bNsSxY8dgY2ODy5cvo1u3bpXO361bN0RERODKlSu4cOEC\nGjdujHbt2sntFfVfTk4OMjIyUFJSAl9fX7lNq9Xq9EGZnJwcPP/88/JthUJR4XxUfzCAqE4qu0YM\nAGRkZMDW1hZmZmYoLCyUp1+/fl2vx7KxsUF2djYkSZI/RDMyMqp1uW9bW1tkZ2dXWKeVlRWMjY2R\nmZkJJyencu3m5uYAgMLCQjRu3LjcOnz22WdQKBSIj4+HSqXCgQMH9Dqy78FgeFDZVo2NjQ0CAgJg\nampa6fympqbo2bMn9uzZg3/++Udn6wdAuf7LzMyEn58fnn/+eZiYmOCXX3557BF4NjY2yMrKkm9L\nkqTTP1T/cAiO6qQtW7YgKysL+fn5WLNmDYKCgtC6dWucO3cOp0+fRlFRkV77RwDA3d0dxsbG2LRp\nE4qLi5GSkoK///67WnUFBgZi586dSE9Px71793T2TRkZGcHf3x8rV67EvXv3cP78eezatUtut7Ky\ngp2dHeLi4qDRaPD999/jypUrcvudO3dgbm4OCwsLZGdnY926dXrVZGVlBaVSqfNYQOkh0AcOHMCe\nPXvQp08febq1tTXy8/Nx69YtnflDQkKwa9cuHDp0qFwAqdVquf/27t2L9PR0dOnSBba2tujUqRM+\n/fRT3L59G1qtFpcvX8bx48fL1dmlSxecO3cOKSkpKCkpwaZNm3Djxg291pHqJgYQ1UnBwcEYNWoU\nunfvjpdeegkTJkxAixYtMHHiRIwYMQI9evSAh4eHXo9lYmKCFStWYNeuXfDy8kJSUlK1TyTZpUsX\nDBs2DMOHD4e/vz/at28vLwMo/THs3bt30alTJ8yePRv9+vXTuf/8+fOxfv16eHt74/z58+jQoYPc\nNmnSJKSlpcHT0xNjx45Fjx499KrJzMwM48ePx6BBg+Dp6YmTJ08CKB0efOWVV6BQKODp6SnP7+Tk\nhLfeegvdu3eHp6envEXn4eEBpVKJNm3awNHRUWcZ7dq1w6VLl+Dj44Nly5Zh+fLleO655wAA0dHR\nKC4ulo9anDJlSoVbp1ZWVvj888+xdOlSeHt749KlSxVe6prqD56MlKgGpaenIzg4GH///XeFQ1A7\nd+7Ejh07sHXrVgHVAXPmzIGtrS0+/PBDveYfPnw4evXqhQEDBsjTRK8D1V3cAiJ6yvbv34/79+/j\n5s2bWLJkCbp27arXGQhq29WrV7F//370799fr/n/+usvpKWloWfPnjVcGT0rGEBET9m2bdvQsWNH\n+Pv7w8jICJ988onokspZtmwZevXqhffee0+vgy1mzZqFkSNHYu7cufIBEkRPikNwREQkBLeAiIhI\nCAYQEREJwQAiIiIhGEBERCQEA4iIiIT4/9XIitKxsMjJAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "KOHPCFRSp5y9",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3aa099cd-791f-4a5a-9ea7-29168fc239b9",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 274
+        }
       },
       "source": [
-        "plt.plot(df_train.yearbuilt,df_train.buildingqualitytypeid , 'ro')\n",
+        "# let's look more into year built vs type \n",
+        "plt.plot(pd_train.yearbuilt, pd_train.buildingqualitytypeid, 'ro')\n",
+        "# display the graph\n",
         "plt.show()"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGx1JREFUeJzt3WtwE+ehBuB3JSFjA6ot2YC5ppnG\n1JmG0DqDGTeFYAimDZDWzJSUi5OBhLYpKbRhUkLTkgYCUUlomEBi0kNLSDLwxx7aQE8hHS4pUCg0\nF+oMxdQY8Az4ItkcY4LtSPudHyDFF0nWZaXd9fc+v8iu9tt3V6tX8mqjVYQQAkREJBWL3gGIiCj1\nWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEbHoHaGm5AVUVcLkGw+tt0ztOXJhdH8yeembNDfSf7BaLgqysQQmPqXv5q6qAqorgv82K\n2fXB7Kln1twAs3fF0z5ERBJi+RMRSYjlT0QkoajK3+12o7i4GOPGjUN1dTUAoKWlBU888QRKSkow\ne/ZsLFu2DM3NzUkNS0RE2ojqC99p06ahrKwMCxYsCE5TFAWPP/44CgsLAdx6g3j55Zexfv365CSl\npGs9cRyeygr4mr2AxQKoKmxOF7JL58Ixqaj3YwIsFjgmT8HwhY9GHLPnWPFmC4yTM7sk7m1NRLTb\nFHJf3VYdYtzAWNeO/R3tZ8+GnNfXemxOFzLGj8dnZ850ywcg7HPbdV5g2mf/PY/WD44Aqtrr+a1/\n560v5vWkKEDP24LfXp8yaBAURYHa1hacFjOLBQPHjYOvofHWtqSlAZ2d3dYZbj/Wj78HnZ/7eu1b\nTVitgN/fLWe414RRKLHcwL24uBjl5eXIy8vrNW///v3YtWsXduzYEVMAr7cNqiqQkzMETU3XY1rW\nKPpD9tYTx9GwcwdEZ2evxyh2O4aVPQYAYR8DAI4HpnY72EONGRgrljeAcON8ZdmPodz99ajH0UK0\n2xRpf0YUqjwTXY/VCkAB/L7o5oXJ4HhgKtLT7Wj43/1RboyOIuzHVOr5mohX146xWBS4XIMTHlOT\nc/6qqmLXrl0oLi7WYjjSgaeyImyBiM5OeCorIj4GwK1Pg32MGRgr0WyisxOX3343pnG0EO029bWv\nwopQWHGvx+8PXfzh5oXJ0PrBETTsfz/yuozCAMUP9H5NGIkm1/mvXbsWGRkZWLhwYczLdn0Hy8kZ\nokUcXZg9e3VL5O9rfH3MBwCoarf9EG5MX0tzTPsr3DgdHm/K93u029TX/oxXqtYTUjynaWTX4zWR\nCK2P9YTL3+1249KlSygvL4fFEvsfEjzto69AdluWM+S56QBblhMAIj4GFku3/RBuTFuWM6b9FW6c\ntGxXyvd7tNvU1/5MZP2pWE9Igdc33wSi1+M1ES/DnfbZtGkTqqqqsHXrVtjt9oTDkH6yS+dCCfMc\nKnY7skvnRnwMADgmT+lzzMBYiWZT7HaMWbQgzBLJE+029bWvwlKU8LPiXY/VCljDfM4LNS9MBsfk\nKRhW8mDkdRlFhP2YSj1fE0Ziff7555/v60Hr1q3Dc889h8bGRuzfvx+VlZWYOHEinn76aQwYMAB7\n9uzB7t27cfToUTz00EMxBbh5sxNCAIMGpeGzz+I4R2oA/SF72qjRGOByof3iRag3b976lCcEbE4X\nhj4yH45JRb0fE2CxwDHlgV5fbPV8fNexYhFunNEzpqV8v0e7TWH3VQQ2pwtDFyyE7+Zn8Hk8vedF\nsR6b04XBhZPgb73+Rb75CzD4618P/dz2mBfIYHE40HH58q1z512e39FTivB/9U1fzOspVOneXp8y\naBAsaWm3vqO4PS1mFgsGfvWrgCpubUtaWq+/RMLtR8f4e6A4Xb32rSas1u7bE+Y1Ea+uHaMoCjIy\nEv+wHdPVPsnA0z76YnZ9mDW7WXMD/Se7IU77EBGRObH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgk\nxPInIpIQy5+ISEIsfyIiCbH8iYgkpMlPOhMRxUqru7xRfFj+RJRyPe9A5mv2omHnDgDgG0CK8LQP\nEaWcVnd5o/ix/Iko5cLdgCZlN6Yhlj8RpZ7N6YppOmmP5U9EKafVXd4ofvzCl4hSLvClLq/20Q/L\nn4h04ZhUxLLXEU/7EBFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\nqM/yd7vdKC4uxrhx41BdXR2cXltbi3nz5qGkpATz5s3DxYsXk5mTiIg01OfPO0ybNg1lZWVYsGBB\nt+lr1qzB/Pnz8fDDD+NPf/oTfv3rX2Pnzp1JC6q3+nfeQusHRwBVBSwWOCZPQcZX7oKnsgLVzV7A\nYgFUNam/UdL1zkdBigIMGAB0dkaVoecY1b0ekTqBnPVv/RH4/POYlw+XfWB+PsY8/Yte0y+/4kb7\n2bNfTEhPB27ejHm9WtBzvyfCrLkBg2VPT0fea2/oGkERQohoHlhcXIzy8nLk5eXB6/WipKQEJ0+e\nhNVqhd/vR2FhIQ4cOACn0xlTAK+3DaoqkJMzBE1N1+PaiGSrf+cttB4+1HuGogAhdp9it2NY2WOa\nvgH0vPNRX0JliHUMM+v5BtCr+In0FsMbQNd+tFgUuFyDE159XOf8r169imHDhsFqtQIArFYrhg4d\niqtXryYcyIhaPzgSekaY981k3JEo1J2PIgmVIdYxzKxn0bP4yXB0+qszQPdf9ez6DpaTM0THJOFV\nq2rMy/hamjXdnuqW5oQzxDOGmXXbdh1zEIUTS0do3Y9xlX9ubi4aGhrg9/uDp30aGxuRm5sb81hm\nOO0TOJceC1uWU9PtsWU5Y77FXc8M8YxhZoY9nohui/YYNcxpH5fLhfz8fOzduxcAsHfvXuTn58d8\nvt8sHJOnhJ6hKKEnJ+GORKHufBRJqAyxjmFmA/PzI/43ke7S03VdfZ9f+K5btw4HDhyAx+NBVlYW\nMjMzsW/fPtTU1GDVqlVobW2Fw+GA2+3GnXfeGXMAU3zyR+SrfXwmvtpHT4le7ROOGa72IcnFeLVP\nMj75R321T7KYpfwjYXZ9MHvqmTU30H+y63rah4iIzI3lT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMR\nSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEbIkO\ncOjQIWzevBlCCAghsGzZMsyYMUOLbERElCQJlb8QAs888wzeffdd5OXl4T//+Q9+8IMfYPr06bBY\n+EeFLFpPHIensgK+Zi9sTheyS+fCMalI71hx6U/bYib9ab+H2hYAhtu+hD/5WywWXL9+HQBw/fp1\nDB06lMUvkdYTx9GwcwdEZycAwNfsRcPOHQCg+8Edq/60LWbSn/Z7qG2p/+MfAAjA7w9OM8L2JdTS\niqLg1VdfxZNPPompU6fiJz/5Cdxut1bZyAQ8lRXBAz1AdHbCU1mhU6L49adtMZP+tN9DbQv8vmDx\nBxhh+xL65O/z+bBt2za8/vrrKCgowL/+9S+sWLEC+/btw6BBg6Iaw+UaHPx3Ts6QROLoStbs1S3N\nIaf7WppTsk+0XEeqt8Wsx4zWuVO535O9z8NtSyixbp/W2RMq/7Nnz6KxsREFBQUAgIKCAqSnp6Om\npgbjx4+Pagyvtw2qKpCTMwRNTdcTiaMbmbPbspzwNXtDTk/2PtF6v6dyW8x6zCQjd6r2eyr2ebht\nCffYaPN0zW6xKN0+NMcrodM+w4cPR319PS5cuAAAqKmpgdfrxZgxYxIORuaQXToXit3ebZpitwe/\n5DKT/rQtZtKf9nuobYHVBlit3SYZYfsS+uSfk5OD559/HsuXL4eiKACA9evXIzMzU5NwZHyBL6yM\ndiVDPPrTtphJf9rv4bYl1DS9t08RQgg9A/C0j76YXR9mzW7W3ED/yW6I0z5ERGROLH8iIgmx/ImI\nJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgklfDOXVKt+6sfAzZvdpgV+K+Oz\n/55H6wdHAFUFLBY4Jk9B6+FDyc+U9DUkD7Prw6zZzZob0DG7ogBCABYLoKr8bZ+AWH7bJ1TxBwV2\nMBGRwSl2O4aVPRb1GwB/2ydc8QMsfiIyDSPcyctc5U9E1E9Ee9OXZGH5ExHpwOZ06bp+c5V/enr4\nebdvJkNEZHRGuJOXqco/77U3Qr4B2JwuDF/yBBwPTL31jTpw62qfB6amOCERUQ+BD6a3u8nmdMX0\nZW+ymOpqH6Nidn0we+qZNTfQf7LLebUPERFpguVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8R\nkYRY/kREEmL5ExFJKOGbuXR0dGD9+vX4xz/+gbS0NEyYMAFr167VIhsRESVJwuW/ceNGpKWlYf/+\n/VAUBR6PR4tcRESm13riODyVFfA1ew1zB6+AhMr/xo0b2LNnD44cOQLl9o8XZWdnaxKMiMjMWk8c\nR8POHRCdnQBu/X5/w84dAGCIN4CEzvnX1dUhMzMTW7ZsQWlpKRYtWoTTp09rlY2IyLQ8lRXB4g8w\nwh28AhL65O/3+1FXV4e7774bv/jFL/DJJ5/gRz/6Ed5//30MHhzdr851/XW6nJwhicTRFbPrg9lT\nz6y5gdRmr25pDjnd19IcVw6tsydU/rm5ubDZbJg1axYA4N5770VWVhZqa2txzz33RDUGf9JZX8yu\nD7NmN2tuIPXZbVnOkLdqtGU5Y85huJ90djqdKCwsxLFjxwAAtbW18Hq9GDt2bMLBiIjMLLt0LhS7\nvds0I9zBKyDhq31+85vfYPXq1XC73bDZbPjtb38Lh8OhRTYiItMKfKnbL6/2AYDRo0fj7bff1iIL\nEVG/4phUZJiy74n/hy8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8R\nkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVP\nRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJSLPy37JlC8aNG4fq6mqthiQi\noiSxaTHIp59+io8//hgjR47UYriYtZ44Dk9lBXzNXticLmSXzoVjUlGf8+Id8/IrbrSfPRt8bLe3\nO0UBBgwAOjsBiwVQ1eDyAIJjBubpzcxv1cyeembNDRg3e97/7NBlvQmXf2dnJ1544QW88sorKCsr\n0yJTTFpPHEfDzh0QnZ0AAF+zFw07dwTnh5sX6Q0g0pjXjv29W/H3IsSt4geC5e5r9qL+j38AIAC/\nv9s8IpJb9eOP6fIGkHD5b968GXPmzMGoUaO0yBMzT2VFsKQDRGcnPJUVwX+Hmhep/CON6Wv2xhfU\n74tvOSKiJEio/D/66CNUVVVh5cqVcY/hcg0O/jsnZ0jMy1e3NIec7gszPTAv0rriGZOIKF7RdF88\n/RhJQuV/6tQp1NTUYNq0aQCA+vp6LFmyBBs2bMD9998f1RhebxtUVSAnZwiamq7HnMGW5Qz5adyW\n5QSAsPMirSvSmHF/8iciCqOv7uvajxaL0u1Dc7wSutpn6dKlOHr0KA4ePIiDBw9i+PDh2L59e9TF\nr4Xs0rlQ7PZu0xS7HdmlcyPOi3fMgfn58QW12gCrNb5liYg0psnVPnoKnLuPdEVPrFf7RBrTMamo\n19U+3Zjsah8i0pdeV/soQgihy5pvS/S0jxEwuz6YPfXMmhvoP9kNcdqHiIjMieVPRCQhlj8RkYRY\n/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQh\nlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\niOVPRCQhlj8RkYRsiSzc0tKCZ555BpcvX4bdbsfYsWPxwgsvwOl0apWPiIiSIKFP/oqi4PHHH8f+\n/fvx3nvvYfTo0Xj55Ze1ykZEREmSUPlnZmaisLAw+N8TJkzAlStXEg5FRETJpQghhBYDqaqKxYsX\no7i4GGVlZVoMSURESZLQOf+u1q5di4yMDCxcuDCm5bzeNqiqQE7OEDQ1XdcqTkoxuz6YPfXMmhvo\nP9ktFgUu1+CEx9Sk/N1uNy5duoTy8nJYLLyAiIjI6BIu/02bNqGqqgpvvvkm7Ha7FpmIiCjJEir/\n8+fPY9u2bbjjjjvwyCOPAABGjRqFrVu3ahKOiIiSI6Hyv+uuu3Du3DmtshARUYrwBD0RkYRY/kRE\nEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYQ0+1VPuqX1xHF4Kivga/bC5nQh\nu3QuHJOKYl4uY/x4fHbmTMhxuj7WMngwhBAQN270uVyo9VU3ewGLBVBVzfdFslWHma6kpUF0dMDm\ndME2bCjaz50z3PaFy250Zs0NGDC7xQLH5CkYvvBRXVav2e/5x6s//aRz64njaNi5A6KzMzhPsdsx\nrOyxiG8AoZbrKTAOgD4fG2q5ruuPZn1ElBqOB6b2+QaQjJ905mkfDXkqK3oVqujshKeyIublegqM\nE81j+1p/rGMQUfK0fnBEl/XytI+GfM3emKZHOz/Wx/W1XLzjEFES6HRKkp/8NWRzumKaHu38ro+L\n9rGRxo9nDCJKEp1ugMXy11B26VwoPW5oo9jtyC6dG/NyPQXGieaxfa0/1jGIKHkck6fosl6e9tFQ\n4EvVWK/2CbVcX1ftJHK1T8/1mfVqn3DMcLUPEa/26UdX+5gRs+vDrNnNmhvoP9l5tQ8REcWN5U9E\nJCGWPxGRhFj+REQS0v1qH4tFCflvs2F2fTB76pk1N9A/smu1Dbpf7UNERKnH0z5ERBJi+RMRSYjl\nT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJKSvm73W4UFxdj3LhxqK6uDk4/dOgQ\nvvvd7+Lhhx/GnDlzcODAgajm1dbWYt68eSgpKcG8efNw8eLFZMSOmP3w4cP43ve+h9mzZ2PhwoWo\nq6uLKp+Rs7e0tOCJJ55ASUkJZs+ejWXLlqG5uTm43Mcff4w5c+agpKQEixcvhtebvHv/xrPfA7Zs\n2dJrOaNn7+jowJo1azBjxgzMnj0bv/rVr4LzjHzMAMZ4rUY6diM99/HO0zt7bW0tFi1ahJkzZ2LW\nrFl49tln0d7eHhzz4MGDmDlzJh588EGsWLECN2/e7DuISIJTp06JK1euiKlTp4pz584JIYRQVVXc\nd999wf8+e/asmDBhgvD7/RHnCSHEokWLxJ49e4QQQuzZs0csWrQoGbHDZr927ZqYOHGiuHDhQjDD\n4sWLg8tEymfk7C0tLeLEiRPB5V966SXx7LPPCiGE8Pv9Yvr06eLUqVNCCCG2bt0qVq1aZZjsAVVV\nVWLJkiXdljND9rVr14oXX3xRqKoqhBCiqakpOM/Ix4xRXqvhjt1Iz32884yQva6uTnz66afBrMuX\nLxdbtmwRQgjR1tYmioqKRG1trRBCiNWrV4vXXnutzxxJKf+AnuU/ceJEcfr0aSGEEP/85z/FjBkz\n+pzn8XhEQUGB8Pl8QgghfD6fKCgoEF6vN5nRu2X/5JNPxHe+853gvJaWFpGXlye8Xm/EfEbP3tNf\n//pX8eijjwaXe+ihh4LzvF6vmDBhQlJzCxFb9o6ODvH9739f1NXV9VrOyNnb2tpEQUGBaGtr6zWG\n0Y8ZI75Whfji2I303Mc7zwjZe9q+fbtYvXq1EEKIv/zlL2Lp0qXBeWfOnOn2/IWTsl/1VBQFr776\nKp588klkZGTgxo0bePPNN/ucd/XqVQwbNgxWqxUAYLVaMXToUFy9ehVOpzMl2b/85S/D4/HgzJkz\nGD9+PN57771gNiFE2HyR5hkhe9cMqqpi165dKC4uDs4fMWJEcL7T6YSqqrh27RoyMzMNkX3z5s2Y\nM2cORo0a1W05o2e3Wq3IzMzEli1bcPLkSQwaNAjLly/HfffdZ/jj3el0Gu612vXYjfTcxzsvmcdM\ntNm7Zmhvb0dFRQV+/vOfA+h9vI8YMQJXr17tc90p+8LX5/Nh27ZteP3113Ho0CG88cYbWLFiBW7c\nuBFxnhEMGTIEv/vd77BhwwaUlpbC6/XC4XAED3Ijizb72rVrkZGRgYULF+qUtLdI2T/66CNUVVVh\n/vz5escMKVJ2v9+Puro63H333aisrMTKlSvx1FNPoa2tTe/YACJnN+Jr1YjHbrRize7z+fCzn/0M\nkyZNwrRp0xJad8o++Z89exaNjY0oKCgAABQUFCA9PR01NTVQFCXsvJEjR6KhoQF+vz/4wmlsbERu\nbm6qogMAioqKUFRUBADweDzYvn07xowZg5s3b4bNJ4QwdPYAt9uNS5cuoby8HBbLrc8Dubm5uHLl\nSvAxzc3NsFgsKfvk3Ff2d955BzU1NcEXQH19PZYsWYINGzYYPnt7eztsNhtmzZoFALj33nuRlZWF\n2tpajBgxwtDHTKTXsR6v1Z7HbqTnPt55RsgOAH6/HytXrsSXvvQlPPfcc8HH5ebm4uTJk8H/vnLl\nSlT7PGWf/IcPH476+npcuHABAFBTUwOv14sxY8ZEnOdyuZCfn4+9e/cCAPbu3Yv8/PyU/Qkc0NTU\nBODWn2mbNm3CI488goyMjIj5jJ4dADZt2oSqqips3boVdrs9uMzXvvY1tLe34/Tp0wCA3bt3Y+bM\nmSnNHSn70qVLcfToURw8eBAHDx7E8OHDsX37dtx///2Gz+50OlFYWIhjx44BuHUlh9frxdixYw1/\nzBjptRrq2I303Mc7zwjZVVXFqlWrYLVa8eKLL0JRvrihy7e+9S38+9//Dl5ZtXv3bnz729/uM0NS\nbuaybt06HDhwAB6PB1lZWcjMzMS+ffvw5z//Gb///e+DwX/6059i+vTpABBxXk1NDVatWoXW1lY4\nHA643W7ceeedWseOmP2Xv/wlPvzwQ3z++ef45je/idWrVyMtLa3PfEbOfv78ecyaNQt33HEHBg4c\nCAAYNWoUtm7dCgD48MMPsWbNGnR0dGDkyJHYuHEjsrOzDZG9p+LiYpSXlyMvL88U2evq6rB69Wpc\nu3YNNpsNK1aswJQpUwAY+5gBjPFajXTsRnru452nd/bDhw/jhz/8IfLy8oJ/nX/jG9/AmjVrAAB/\n+9vfsHHjRqiqivz8fLz00kvBD3jh8E5eREQS4v/hS0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/\nEZGEWP5ERBJi+RMRSej/AZusTW/jKGeJAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1889,6 +2302,16 @@
         "- filling nans"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ofZIC0EdKJ0Y",
+        "colab_type": "text"
+      },
+      "source": [
+        "# -----current: test ready-----"
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
@@ -1937,16 +2360,258 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "uCyRxp-7qEXf",
+        "id": "AT8Osn51lD9v",
+        "colab_type": "code",
+        "outputId": "9a3af301-2c19-4bfd-faca-3dba219a270c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 50
+        }
+      },
+      "source": [
+        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "temp=df_train.copy()\n",
+        "temp['buildingqualitytypeid']=temp['buildingqualitytypeid'].fillna(-1)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "32911\n",
+            "(90275, 45)\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "f8rNxkrxACGe",
         "colab_type": "code",
         "colab": {}
       },
       "source": [
+        "\"\"\"RESET WIRE\"\"\"\n",
+        "# hold_df = df_train.copy()\n",
+        "df_train = hold_df.copy()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OkyuebKaACxa",
+        "colab_type": "code",
+        "outputId": "d0dc876b-b02f-4179-91d0-d9a9b42e0e27",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 185
+        }
+      },
+      "source": [
+        "\n",
         "print(df_train.buildingqualitytypeid.isnull().sum())\n",
         "print(df_train.shape)\n",
         "temp=df_train.copy()\n",
         "temp['buildingqualitytypeid']=temp['buildingqualitytypeid'].fillna(-1)\n",
-        "temp=temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+        "print(temp.to_pandas().head())\n"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "32911\n",
+            "(90275, 45)\n",
+            "   parcelid  logerror  ac_id  ...  transaction_month  census_tractnumber  block_number\n",
+            "0  11827818    0.0402    NaN  ...                  3             5315.03          1013\n",
+            "1  12123024    0.0296    NaN  ...                  3             4625.00          1017\n",
+            "2  13867327    0.0344    NaN  ...                  3             0114.01          2017\n",
+            "3  12681894    0.0060    NaN  ...                  3             6513.02          1004\n",
+            "4  12848541    0.0695    1.0  ...                  3             4087.03          1018\n",
+            "\n",
+            "[5 rows x 45 columns]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "79bB7JKdAEtX",
+        "colab_type": "code",
+        "outputId": "29f38a6a-dac2-4917-8f1b-8a4b198afe67",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 118
+        }
+      },
+      "source": [
+        "print(temp.to_pandas().buildingqualitytypeid.head())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "0    7.0\n",
+            "1   -1.0\n",
+            "2   -1.0\n",
+            "3    7.0\n",
+            "4    4.0\n",
+            "Name: buildingqualitytypeid, dtype: float64\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DVgF1c_p_bN1",
+        "colab_type": "text"
+      },
+      "source": [
+        "# -----current: break-----"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "mAB9bsrPAGzQ",
+        "colab_type": "code",
+        "outputId": "2f9eaa73-a7b2-4634-e24d-9aec777b2536",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 387
+        }
+      },
+      "source": [
+        "# say we run this whole thing by buildingqualitytypeid\n",
+        "# temp=temp.groupby(\"buildingqualitytypeid\")\n",
+        "# drop building types that aren't seen at least 3 times in the data\n",
+        "# .filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+        "# conditions = (temp.buildingqualitytypeid.value_counts > 3)\n",
+        "# print(temp.loc[temp.buildingqualitytypeid.astype(int) > 3].head())\n",
+        "# temp.loc[temp.census_tractnumber.value_counts() > 3]\n",
+        "print(temp.loc[temp.census_tractnumber.value_counts().values > 3].to_pandas().head())\n",
+        "\n",
+        "# temp = temp.loc[]\n",
+        "print(temp.to_pandas().buildingqualitytypeid.head())\n"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "RuntimeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-63-19513b5ffbd3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcensus_tractnumber\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# temp = temp.loc[]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    108\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m                 \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple_arg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    112\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple_arg\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    210\u001b[0m                 \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcolumns_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m                     \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumns_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    213\u001b[0m         \u001b[0;31m# Step 3: Gather index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# we have a single row\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m     54\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m         \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_loc_to_iloc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     37\u001b[0m             \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    391\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 392\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    393\u001b[0m         \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    394\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    530\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    531\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_bool_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_boolean_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    533\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    534\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/columnops.py\u001b[0m in \u001b[0;36mapply_boolean_mask\u001b[0;34m(self, mask)\u001b[0m\n\u001b[1;32m    116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bool\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mapply_apply_boolean_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mcolumn_empty_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnewsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32mcudf/bindings/stream_compaction.pyx\u001b[0m in \u001b[0;36mcudf.bindings.stream_compaction.apply_apply_boolean_mask\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;31mRuntimeError\u001b[0m: cuDF failure at: /conda/conda-bld/libcudf_1566412619056/work/cpp/src/stream_compaction/apply_boolean_mask.cu:64: Column size mismatch"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QCyed1SjAJFP",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "print(temp.to_pandas().head())\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1JgQ1Tq2NRsz",
+        "colab_type": "code",
+        "outputId": "c113cc08-3a69-4aa1-d05e-7b4d2a5df9fa",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 162
+        }
+      },
+      "source": [
+        "df_train.loc[df_train.buildingqualitytypeid>3]"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "NameError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-75-25a467e8484f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;31mNameError\u001b[0m: name 'buildingqualitytypeid' is not defined"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XFkPwjUmHu4Y",
+        "colab_type": "code",
+        "outputId": "00b5fdb3-25fc-460a-bbd3-aaa421a93555",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 279
+        }
+      },
+      "source": [
+        "temp=temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "AttributeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-72-7111ac5c7eeb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"buildingqualitytypeid\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/groupby/groupby.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    133\u001b[0m             )\n\u001b[1;32m    134\u001b[0m         raise AttributeError(\n\u001b[0;32m--> 135\u001b[0;31m             \u001b[0;34m\"'DataFrameGroupBy' object has no attribute \"\u001b[0m \u001b[0;34m\"'{}'\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    136\u001b[0m         )\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'filter'"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uCyRxp-7qEXf",
+        "colab_type": "code",
+        "outputId": "969848f0-fbc6-4388-dca2-08f8bde03990",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 380
+        }
+      },
+      "source": [
+        "\n",
         "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].replace(-1,np.nan)\n",
         "print(temp.buildingqualitytypeid.isnull().sum())\n",
         "print(temp.shape)\n",
@@ -1962,7 +2627,28 @@
         "print(df_train.buildingqualitytypeid.isnull().sum())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "32911\n",
+            "(90275, 45)\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "error",
+          "ename": "AttributeError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-44-2202aaa9de30>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"buildingqualitytypeid\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnan\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/groupby/groupby.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    133\u001b[0m             )\n\u001b[1;32m    134\u001b[0m         raise AttributeError(\n\u001b[0;32m--> 135\u001b[0;31m             \u001b[0;34m\"'DataFrameGroupBy' object has no attribute \"\u001b[0m \u001b[0;34m\"'{}'\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    136\u001b[0m         )\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'filter'"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "code",

From 6936eed3c12bd592bbddddb899ffed00aca5065e Mon Sep 17 00:00:00 2001
From: Winston <winston@Winstons-MacBook-Pro.local>
Date: Wed, 4 Sep 2019 23:19:21 -0700
Subject: [PATCH 3/7] current issues labeled

---
 .../zillow_kaggle_zestimate_comp.ipynb        | 853 +++++++++---------
 1 file changed, 433 insertions(+), 420 deletions(-)

diff --git a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
index cda0658e..f05586f5 100644
--- a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
+++ b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
@@ -43,10 +43,10 @@
       "metadata": {
         "id": "W-um5d-x7o46",
         "colab_type": "code",
-        "outputId": "35d83399-515c-4172-e915-3886511baba2",
+        "outputId": "a3d473ea-3028-49fb-b769-c78616b388ae",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 302
+          "height": 312
         }
       },
       "source": [
@@ -56,20 +56,20 @@
         "# display gpu specs\n",
         "!nvidia-smi"
       ],
-      "execution_count": 0,
+      "execution_count": 1,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "Wed Aug 21 22:49:26 2019       \n",
+            "Thu Sep  5 06:04:00 2019       \n",
             "+-----------------------------------------------------------------------------+\n",
-            "| NVIDIA-SMI 430.40       Driver Version: 410.79       CUDA Version: 10.0     |\n",
+            "| NVIDIA-SMI 430.40       Driver Version: 418.67       CUDA Version: 10.1     |\n",
             "|-------------------------------+----------------------+----------------------+\n",
             "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
             "|===============================+======================+======================|\n",
             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
-            "| N/A   49C    P8    16W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
+            "| N/A   39C    P8    10W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
             "+-------------------------------+----------------------+----------------------+\n",
             "                                                                               \n",
             "+-----------------------------------------------------------------------------+\n",
@@ -98,16 +98,16 @@
       "metadata": {
         "id": "p129YxxnihcV",
         "colab_type": "code",
-        "outputId": "a7de3ee2-b456-45d7-ab54-03eb1d72a956",
+        "outputId": "ce0d1990-45c5-4c91-d1f2-86cedd666bbc",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 1000
         }
       },
       "source": [
-        "!wget -nc https://raw.githubusercontent.com/randerzander/notebooks-contrib/master/utils/rapids-colab.sh\n",
-        "# RAPIDS 0.9 nightly\n",
-        "!bash rapids-colab.sh 0.9\n",
+        "!wget -nc https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
+        "# RAPIDS 0.10 nightly\n",
+        "!bash rapids-colab.sh \n",
         "\n",
         "import sys, os\n",
         "\n",
@@ -115,32 +115,32 @@
         "os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'\n",
         "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
       ],
-      "execution_count": 0,
+      "execution_count": 2,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "--2019-08-21 22:49:32--  https://raw.githubusercontent.com/randerzander/notebooks-contrib/master/utils/rapids-colab.sh\n",
+            "--2019-09-05 06:04:07--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
             "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
             "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 1606 (1.6K) [text/plain]\n",
+            "Length: 1609 (1.6K) [text/plain]\n",
             "Saving to: ‘rapids-colab.sh’\n",
             "\n",
             "\rrapids-colab.sh       0%[                    ]       0  --.-KB/s               \rrapids-colab.sh     100%[===================>]   1.57K  --.-KB/s    in 0s      \n",
             "\n",
-            "2019-08-21 22:49:33 (231 MB/s) - ‘rapids-colab.sh’ saved [1606/1606]\n",
+            "2019-09-05 06:04:08 (510 MB/s) - ‘rapids-colab.sh’ saved [1609/1609]\n",
             "\n",
-            "--2019-08-21 22:49:33--  https://github.com/rapidsai/notebooks-extended/raw/master/utils/env-check.py\n",
-            "Resolving github.com (github.com)... 140.82.113.3\n",
-            "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n",
+            "--2019-09-05 06:04:09--  https://github.com/rapidsai/notebooks-extended/raw/master/utils/env-check.py\n",
+            "Resolving github.com (github.com)... 13.114.40.48\n",
+            "Connecting to github.com (github.com)|13.114.40.48|:443... connected.\n",
             "HTTP request sent, awaiting response... 301 Moved Permanently\n",
             "Location: https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py [following]\n",
-            "--2019-08-21 22:49:33--  https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py\n",
+            "--2019-09-05 06:04:09--  https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py\n",
             "Reusing existing connection to github.com:443.\n",
             "HTTP request sent, awaiting response... 302 Found\n",
             "Location: https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py [following]\n",
-            "--2019-08-21 22:49:33--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py\n",
+            "--2019-09-05 06:04:10--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py\n",
             "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
             "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
@@ -149,7 +149,7 @@
             "\n",
             "env-check.py        100%[===================>]     783  --.-KB/s    in 0s      \n",
             "\n",
-            "2019-08-21 22:49:33 (125 MB/s) - ‘env-check.py’ saved [783/783]\n",
+            "2019-09-05 06:04:10 (162 MB/s) - ‘env-check.py’ saved [783/783]\n",
             "\n",
             "Checking for GPU type:\n",
             "*********************************************\n",
@@ -164,16 +164,16 @@
             "Uninstalling distributed-1.25.3:\n",
             "  Successfully uninstalled distributed-1.25.3\n",
             "Installing conda\n",
-            "--2019-08-21 22:49:38--  https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
+            "--2019-09-05 06:04:14--  https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
             "Resolving repo.continuum.io (repo.continuum.io)... 104.18.200.79, 104.18.201.79, 2606:4700::6812:c94f, ...\n",
             "Connecting to repo.continuum.io (repo.continuum.io)|104.18.200.79|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
             "Length: 58468498 (56M) [application/x-sh]\n",
             "Saving to: ‘Miniconda3-4.5.4-Linux-x86_64.sh’\n",
             "\n",
-            "Miniconda3-4.5.4-Li 100%[===================>]  55.76M   151MB/s    in 0.4s    \n",
+            "Miniconda3-4.5.4-Li 100%[===================>]  55.76M  65.1MB/s    in 0.9s    \n",
             "\n",
-            "2019-08-21 22:49:38 (151 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]\n",
+            "2019-09-05 06:04:15 (65.1 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]\n",
             "\n",
             "PREFIX=/usr/local\n",
             "installing: python-3.6.5-hc3d631a_2 ...\n",
@@ -217,7 +217,7 @@
             "    For best results, please verify that your PYTHONPATH only points to\n",
             "    directories of packages that are compatible with the Python interpreter\n",
             "    in Miniconda3: /usr/local\n",
-            "Installing RAPIDS packages\n",
+            "Installing RAPIDS 0.10 packages\n",
             "Please standby, this will take a few minutes...\n",
             "\n",
             "\n",
@@ -230,133 +230,133 @@
             "    $ conda update -n base conda\n",
             "\n",
             "\n",
-            "bzip2-1.0.8          |  396 KB | : 100% 1.0/1 [00:00<00:00,  6.99it/s]              \n",
-            "requests-2.22.0      |   84 KB | : 100% 1.0/1 [00:00<00:00,  6.56it/s]                \n",
-            "olefile-0.46         |   31 KB | : 100% 1.0/1 [00:00<00:00, 23.03it/s]\n",
-            "yaml-0.1.7           |   78 KB | : 100% 1.0/1 [00:00<00:00, 16.84it/s]\n",
-            "zlib-1.2.11          |  105 KB | : 100% 1.0/1 [00:00<00:00, 15.03it/s]\n",
-            "llvmlite-0.29.0      | 19.9 MB | : 100% 1.0/1 [00:03<00:00,  3.64s/it]               \n",
-            "pyopenssl-19.0.0     |   81 KB | : 100% 1.0/1 [00:00<00:00, 16.66it/s]\n",
-            "thrift-cpp-0.12.0    |  2.4 MB | : 100% 1.0/1 [00:00<00:00,  1.76it/s]              \n",
-            "toolz-0.10.0         |   46 KB | : 100% 1.0/1 [00:00<00:00, 17.97it/s]\n",
-            "libevent-2.1.10      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  2.23it/s]               \n",
-            "libffi-3.2.1         |   46 KB | : 100% 1.0/1 [00:00<00:00, 18.49it/s]\n",
-            "cudf-0.10.0a         |  4.8 MB | : 100% 1.0/1 [00:01<00:00,  1.50s/it]               \n",
-            "snappy-1.1.7         |   39 KB | : 100% 1.0/1 [00:00<00:00, 19.74it/s]\n",
-            "cloudpickle-1.2.1    |   22 KB | : 100% 1.0/1 [00:00<00:00, 17.29it/s]\n",
-            "re2-2019.08.01       |  420 KB | : 100% 1.0/1 [00:00<00:00,  6.36it/s]               \n",
-            "pyjwt-1.7.1          |   17 KB | : 100% 1.0/1 [00:00<00:00, 23.11it/s]\n",
-            "libstdcxx-ng-9.1.0   |  4.0 MB | : 100% 1.0/1 [00:00<00:00,  1.44it/s]               \n",
-            "libgfortran-ng-7.3.0 |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  3.53it/s]               \n",
-            "cython-0.29.13       |  2.2 MB | : 100% 1.0/1 [00:00<00:00,  1.72it/s]               \n",
-            "pyparsing-2.4.2      |   57 KB | : 100% 1.0/1 [00:00<00:00, 19.30it/s]\n",
-            "chardet-3.0.4        |  190 KB | : 100% 1.0/1 [00:00<00:00,  9.45it/s]\n",
-            "rsa-3.4.2            |   31 KB | : 100% 1.0/1 [00:00<00:00, 19.23it/s]\n",
-            "libxgboost-0.90.rapi | 33.2 MB | : 100% 1.0/1 [00:08<00:00,  8.58s/it]               \n",
-            "pyasn1-modules-0.2.6 |   47 KB | : 100% 1.0/1 [00:00<00:00, 12.11it/s]\n",
-            "lz4-c-1.8.3          |  187 KB | : 100% 1.0/1 [00:00<00:00, 12.18it/s]\n",
-            "freetype-2.10.0      |  884 KB | : 100% 1.0/1 [00:00<00:00,  4.76it/s]               \n",
-            "arrow-cpp-0.14.1     | 17.3 MB | : 100% 1.0/1 [00:03<00:00,  3.36s/it]               \n",
-            "oauthlib-3.0.1       |   82 KB | : 100% 1.0/1 [00:00<00:00, 12.63it/s]\n",
-            "libcumlprims-0.9.0   |  3.9 MB | : 100% 1.0/1 [00:01<00:00,  1.55s/it]               \n",
-            "libcugraph-0.10.0a   | 11.2 MB | : 100% 1.0/1 [00:02<00:00,  2.33s/it]               \n",
-            "dask-cuml-0.8.0a     |   30 KB | : 100% 1.0/1 [00:00<00:00,  3.87it/s]                \n",
-            "fastavro-0.22.3      |  408 KB | : 100% 1.0/1 [00:00<00:00,  6.77it/s]               \n",
-            "scipy-1.3.1          | 18.1 MB | : 100% 1.0/1 [00:03<00:00,  3.52s/it]               \n",
-            "certifi-2019.6.16    |  149 KB | : 100% 1.0/1 [00:00<00:00, 15.17it/s]\n",
-            "decorator-4.4.0      |   11 KB | : 100% 1.0/1 [00:00<00:00, 20.06it/s]\n",
-            "google-auth-1.6.3    |   45 KB | : 100% 1.0/1 [00:00<00:00, 16.56it/s]\n",
-            "parquet-cpp-1.5.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 27.43it/s]\n",
-            "rmm-0.10.0a          |   14 KB | : 100% 1.0/1 [00:00<00:00,  3.98it/s] \n",
-            "glog-0.4.0           |  104 KB | : 100% 1.0/1 [00:00<00:00, 15.00it/s]\n",
-            "wheel-0.33.6         |   35 KB | : 100% 1.0/1 [00:00<00:00, 17.29it/s]\n",
-            "bokeh-1.3.4          |  4.0 MB | : 100% 1.0/1 [00:01<00:00,  1.56s/it]              \n",
-            "scikit-learn-0.21.3  |  6.7 MB | : 100% 1.0/1 [00:01<00:00,  1.60s/it]               \n",
-            "libtiff-4.0.10       |  587 KB | : 100% 1.0/1 [00:00<00:00,  6.63it/s]               \n",
-            "idna-2.8             |  132 KB | : 100% 1.0/1 [00:00<00:00, 15.63it/s]\n",
-            "pillow-6.1.0         |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.86it/s]               \n",
-            "_libgcc_mutex-0.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 43.53it/s]\n",
-            "nccl-2.4.6.1         | 66.6 MB | : 100% 1.0/1 [00:10<00:00, 10.59s/it]              \n",
-            "pyyaml-5.1.2         |  184 KB | : 100% 1.0/1 [00:00<00:00, 10.61it/s]\n",
-            "blinker-1.4          |   13 KB | : 100% 1.0/1 [00:00<00:00, 20.08it/s]\n",
-            "librmm-0.10.0a       |   44 KB | : 100% 1.0/1 [00:00<00:00,  3.31it/s]               \n",
-            "sortedcontainers-2.1 |   25 KB | : 100% 1.0/1 [00:00<00:00, 14.67it/s]\n",
-            "cytoolz-0.10.0       |  429 KB | : 100% 1.0/1 [00:00<00:00,  7.83it/s]               \n",
-            "dask-cuda-0.10.0a    |  911 KB | : 100% 1.0/1 [00:00<00:00,  1.66it/s]               \n",
-            "libblas-3.8.0        |   10 KB | : 100% 1.0/1 [00:00<00:00,  5.23it/s] \n",
-            "distributed-2.3.0    |  366 KB | : 100% 1.0/1 [00:00<00:00,  5.36it/s]               \n",
-            "libpng-1.6.37        |  343 KB | : 100% 1.0/1 [00:00<00:00,  8.59it/s]               \n",
-            "jinja2-2.10.1        |   91 KB | : 100% 1.0/1 [00:00<00:00, 15.90it/s]\n",
-            "msgpack-python-0.6.1 |   89 KB | : 100% 1.0/1 [00:00<00:00, 17.11it/s]\n",
-            "numpy-1.17.0         |  5.2 MB | : 100% 1.0/1 [00:01<00:00,  1.30s/it]               \n",
-            "gflags-2.2.2         |  177 KB | : 100% 1.0/1 [00:00<00:00, 11.98it/s]\n",
-            "tk-8.6.9             |  3.2 MB | : 100% 1.0/1 [00:00<00:00,  1.35it/s]               \n",
-            "ca-certificates-2019 |  145 KB | : 100% 1.0/1 [00:00<00:00, 15.40it/s]\n",
-            "cffi-1.12.3          |  218 KB | : 100% 1.0/1 [00:00<00:00, 11.34it/s]\n",
-            "asn1crypto-0.24.0    |  154 KB | : 100% 1.0/1 [00:00<00:00, 11.99it/s]\n",
-            "dlpack-0.2           |   12 KB | : 100% 1.0/1 [00:00<00:00, 24.28it/s]\n",
-            "boost-cpp-1.70.0     | 21.1 MB | : 100% 1.0/1 [00:09<00:00,  9.52s/it]               \n",
-            "pyarrow-0.14.1       |  2.8 MB | : 100% 1.0/1 [00:00<00:00,  1.14it/s]              \n",
-            "markupsafe-1.1.1     |   26 KB | : 100% 1.0/1 [00:00<00:00, 21.22it/s]\n",
-            "six-1.12.0           |   22 KB | : 100% 1.0/1 [00:00<00:00, 17.89it/s]\n",
-            "python-3.6.7         | 34.6 MB | : 100% 1.0/1 [00:05<00:00,  5.94s/it]               \n",
-            "icu-64.2             | 12.6 MB | : 100% 1.0/1 [00:02<00:00,  2.19s/it]               \n",
-            "libopenblas-0.3.7    |  7.6 MB | : 100% 1.0/1 [00:01<00:00,  1.52s/it]               \n",
-            "c-ares-1.15.0        |  100 KB | : 100% 1.0/1 [00:00<00:00, 17.03it/s]\n",
-            "numba-0.45.1         |  3.1 MB | : 100% 1.0/1 [00:00<00:00,  1.00it/s]               \n",
-            "zstd-1.4.0           |  928 KB | : 100% 1.0/1 [00:00<00:00,  5.27it/s]               \n",
-            "pycparser-2.19       |  173 KB | : 100% 1.0/1 [00:00<00:00, 11.22it/s]\n",
-            "openssl-1.1.1c       |  2.1 MB | : 100% 1.0/1 [00:00<00:00,  2.22it/s]               \n",
-            "dask-cudf-0.10.0a    |   63 KB | : 100% 1.0/1 [00:00<00:00,  2.84it/s]                \n",
-            "sqlite-3.29.0        |  2.0 MB | : 100% 1.0/1 [00:00<00:00,  2.75it/s]               \n",
-            "readline-8.0         |  441 KB | : 100% 1.0/1 [00:00<00:00,  7.41it/s]               \n",
-            "tblib-1.4.0          |   12 KB | : 100% 1.0/1 [00:00<00:00, 25.51it/s]\n",
-            "locket-0.2.0         |    6 KB | : 100% 1.0/1 [00:00<00:00, 29.95it/s]\n",
-            "pyasn1-0.4.6         |   52 KB | : 100% 1.0/1 [00:00<00:00, 15.07it/s]\n",
-            "pytz-2019.2          |  228 KB | : 100% 1.0/1 [00:00<00:00,  4.22it/s]              \n",
-            "libcudf-0.10.0a      | 26.0 MB | : 100% 1.0/1 [00:05<00:00,  5.98s/it]               \n",
-            "double-conversion-3. |   85 KB | : 100% 1.0/1 [00:00<00:00, 15.44it/s]\n",
-            "fsspec-0.4.1         |   39 KB | : 100% 1.0/1 [00:00<00:00, 19.96it/s]\n",
-            "uriparser-0.9.3      |   49 KB | : 100% 1.0/1 [00:00<00:00, 19.50it/s]\n",
-            "requests-oauthlib-1. |   19 KB | : 100% 1.0/1 [00:00<00:00, 19.66it/s]\n",
-            "cryptography-2.7     |  607 KB | : 100% 1.0/1 [00:00<00:00,  3.52it/s]               \n",
-            "cachetools-2.1.0     |   10 KB | : 100% 1.0/1 [00:00<00:00, 24.47it/s]\n",
-            "ncurses-6.1          |  1.3 MB | : 100% 1.0/1 [00:01<00:00,  1.02s/it]               \n",
-            "gcsfs-0.3.0          |   19 KB | : 100% 1.0/1 [00:00<00:00, 15.81it/s]\n",
-            "libnvstrings-0.10.0a | 16.8 MB | : 100% 1.0/1 [00:07<00:00,  7.28s/it]               \n",
-            "cudatoolkit-10.0.130 | 380.0 MB | : 100% 1.0/1 [00:56<00:00, 57.00s/it]                \n",
-            "pip-19.2.2           |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  1.62it/s]               \n",
-            "liblapack-3.8.0      |   10 KB | : 100% 1.0/1 [00:00<00:00, 18.78it/s]\n",
-            "click-7.0            |   61 KB | : 100% 1.0/1 [00:00<00:00, 18.70it/s]\n",
-            "cuml-0.10.0a         |  6.0 MB | : 100% 1.0/1 [00:01<00:00,  1.69s/it]              \n",
-            "grpc-cpp-1.23.0      |  4.5 MB | : 100% 1.0/1 [00:01<00:00,  1.10s/it]              \n",
-            "dask-2.3.0           |    4 KB | : 100% 1.0/1 [00:00<00:00, 27.57it/s]\n",
-            "brotli-1.0.7         |  1.0 MB | : 100% 1.0/1 [00:00<00:00,  5.00it/s]               \n",
-            "nvstrings-0.10.0a    |  124 KB | : 100% 1.0/1 [00:00<00:00,  3.47it/s]                \n",
-            "tornado-6.0.3        |  636 KB | : 100% 1.0/1 [00:00<00:00,  4.58it/s]             \n",
-            "pynvml-8.0.2         |   30 KB | : 100% 1.0/1 [00:00<00:00, 21.55it/s]\n",
-            "libgcc-ng-9.1.0      |  8.1 MB | : 100% 1.0/1 [00:01<00:00,  1.40s/it]               \n",
-            "libcblas-3.8.0       |   10 KB | : 100% 1.0/1 [00:00<00:00, 22.83it/s]\n",
-            "joblib-0.13.2        |  180 KB | : 100% 1.0/1 [00:00<00:00,  8.76it/s]\n",
-            "pandas-0.24.2        | 11.1 MB | : 100% 1.0/1 [00:02<00:00,  2.68s/it]               \n",
-            "psutil-5.6.3         |  322 KB | : 100% 1.0/1 [00:00<00:00,  7.88it/s]               \n",
-            "heapdict-1.0.0       |    7 KB | : 100% 1.0/1 [00:00<00:00, 21.63it/s]\n",
-            "jpeg-9c              |  251 KB | : 100% 1.0/1 [00:00<00:00, 10.08it/s]\n",
-            "zict-1.0.0           |   10 KB | : 100% 1.0/1 [00:00<00:00, 20.76it/s]\n",
-            "libprotobuf-3.8.0    |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.06s/it]               \n",
-            "packaging-19.0       |   23 KB | : 100% 1.0/1 [00:00<00:00, 20.95it/s]\n",
-            "xgboost-0.90.rapidsd |   12 KB | : 100% 1.0/1 [00:00<00:00,  2.77it/s] \n",
-            "cugraph-0.10.0a      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  1.74it/s]              \n",
-            "urllib3-1.25.3       |  187 KB | : 100% 1.0/1 [00:00<00:00,  9.23it/s]\n",
-            "py-xgboost-0.90.rapi |   87 KB | : 100% 1.0/1 [00:00<00:00,  3.59it/s]                \n",
-            "dask-core-2.3.0      |  574 KB | : 100% 1.0/1 [00:00<00:00,  4.29it/s]              \n",
-            "setuptools-41.2.0    |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.25it/s]               \n",
-            "pysocks-1.7.0        |   26 KB | : 100% 1.0/1 [00:00<00:00, 21.18it/s]\n",
-            "libcuml-0.10.0a      | 29.7 MB | : 100% 1.0/1 [00:07<00:00,  7.44s/it]                \n",
-            "partd-1.0.0          |   16 KB | : 100% 1.0/1 [00:00<00:00, 21.76it/s]\n",
-            "google-auth-oauthlib |   18 KB | : 100% 1.0/1 [00:00<00:00, 23.67it/s]\n",
-            "python-dateutil-2.8. |  219 KB | : 100% 1.0/1 [00:00<00:00, 11.17it/s]\n",
-            "xz-5.2.4             |  366 KB | : 100% 1.0/1 [00:00<00:00,  7.94it/s]               \n",
+            "dask-cuda-0.10.0a    |  921 KB | : 100% 1.0/1 [00:02<00:00,  2.81s/it]               \n",
+            "jpeg-9c              |  251 KB | : 100% 1.0/1 [00:00<00:00,  8.31it/s]\n",
+            "ca-certificates-2019 |  145 KB | : 100% 1.0/1 [00:00<00:00, 10.93it/s]\n",
+            "joblib-0.13.2        |  180 KB | : 100% 1.0/1 [00:00<00:00,  7.30it/s]\n",
+            "blinker-1.4          |   13 KB | : 100% 1.0/1 [00:00<00:00, 13.37it/s]\n",
+            "dask-core-2.3.0      |  574 KB | : 100% 1.0/1 [00:00<00:00,  4.16it/s]              \n",
+            "cudf-0.10.0a         |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.74s/it]               \n",
+            "pyasn1-modules-0.2.6 |   47 KB | : 100% 1.0/1 [00:00<00:00, 10.35it/s]\n",
+            "jinja2-2.10.1        |   91 KB | : 100% 1.0/1 [00:00<00:00, 11.34it/s]\n",
+            "grpc-cpp-1.23.0      |  4.5 MB | : 100% 1.0/1 [00:01<00:00,  1.05s/it]               \n",
+            "boost-cpp-1.70.0     | 21.1 MB | : 100% 1.0/1 [00:08<00:00,  8.53s/it]               \n",
+            "idna-2.8             |  132 KB | : 100% 1.0/1 [00:00<00:00, 11.10it/s]\n",
+            "numba-0.45.1         |  3.1 MB | : 100% 1.0/1 [00:00<00:00,  1.04it/s]               \n",
+            "numpy-1.17.1         |  5.2 MB | : 100% 1.0/1 [00:01<00:00,  1.13s/it]               \n",
+            "yaml-0.1.7           |   78 KB | : 100% 1.0/1 [00:00<00:00, 12.23it/s]\n",
+            "click-7.0            |   61 KB | : 100% 1.0/1 [00:00<00:00, 12.19it/s]\n",
+            "python-dateutil-2.8. |  219 KB | : 100% 1.0/1 [00:00<00:00, 11.76it/s]\n",
+            "google-auth-1.6.3    |   45 KB | : 100% 1.0/1 [00:00<00:00, 11.44it/s]\n",
+            "gcsfs-0.3.0          |   19 KB | : 100% 1.0/1 [00:00<00:00, 15.31it/s]\n",
+            "tk-8.6.9             |  3.2 MB | : 100% 1.0/1 [00:00<00:00,  1.57it/s]               \n",
+            "pytz-2019.2          |  228 KB | : 100% 1.0/1 [00:00<00:00,  4.04it/s]              \n",
+            "pip-19.2.3           |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  1.82it/s]             \n",
+            "cachetools-2.1.0     |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.55it/s]\n",
+            "zict-1.0.0           |   10 KB | : 100% 1.0/1 [00:00<00:00, 14.96it/s]\n",
+            "cloudpickle-1.2.1    |   22 KB | : 100% 1.0/1 [00:00<00:00, 14.84it/s]\n",
+            "dask-cudf-0.10.0a    |   62 KB | : 100% 1.0/1 [00:01<00:00,  1.34s/it] \n",
+            "libcudf-0.10.0a      | 26.0 MB | : 100% 1.0/1 [00:07<00:00,  7.09s/it]               \n",
+            "pillow-6.1.0         |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.42it/s]               \n",
+            "libcumlprims-0.9.0   |  3.9 MB | : 100% 1.0/1 [00:02<00:00,  2.24s/it]                \n",
+            "cytoolz-0.10.0       |  429 KB | : 100% 1.0/1 [00:00<00:00,  6.44it/s]               \n",
+            "requests-oauthlib-1. |   19 KB | : 100% 1.0/1 [00:00<00:00, 14.79it/s]\n",
+            "six-1.12.0           |   22 KB | : 100% 1.0/1 [00:00<00:00, 13.72it/s]\n",
+            "bzip2-1.0.8          |  396 KB | : 100% 1.0/1 [00:00<00:00,  7.69it/s]               \n",
+            "llvmlite-0.29.0      | 19.9 MB | : 100% 1.0/1 [00:03<00:00,  3.15s/it]               \n",
+            "re2-2019.09.01       |  431 KB | : 100% 1.0/1 [00:00<00:00,  7.14it/s]               \n",
+            "zstd-1.4.0           |  928 KB | : 100% 1.0/1 [00:00<00:00,  4.81it/s]               \n",
+            "pycparser-2.19       |  173 KB | : 100% 1.0/1 [00:00<00:00,  9.89it/s]\n",
+            "urllib3-1.25.3       |  187 KB | : 100% 1.0/1 [00:00<00:00,  7.69it/s]\n",
+            "uriparser-0.9.3      |   49 KB | : 100% 1.0/1 [00:00<00:00, 10.99it/s]\n",
+            "gflags-2.2.2         |  177 KB | : 100% 1.0/1 [00:00<00:00,  9.88it/s]\n",
+            "libpng-1.6.37        |  343 KB | : 100% 1.0/1 [00:00<00:00,  8.51it/s]               \n",
+            "certifi-2019.6.16    |  149 KB | : 100% 1.0/1 [00:00<00:00, 12.26it/s]\n",
+            "libcblas-3.8.0       |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.55it/s]\n",
+            "_libgcc_mutex-0.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 18.61it/s]\n",
+            "psutil-5.6.3         |  322 KB | : 100% 1.0/1 [00:00<00:00,  7.35it/s]               \n",
+            "lz4-c-1.8.3          |  187 KB | : 100% 1.0/1 [00:00<00:00,  9.43it/s]\n",
+            "zlib-1.2.11          |  105 KB | : 100% 1.0/1 [00:00<00:00, 11.30it/s]\n",
+            "fsspec-0.4.4         |   39 KB | : 100% 1.0/1 [00:00<00:00, 13.92it/s]\n",
+            "thrift-cpp-0.12.0    |  2.4 MB | : 100% 1.0/1 [00:00<00:00,  2.11it/s]               \n",
+            "double-conversion-3. |   85 KB | : 100% 1.0/1 [00:00<00:00, 11.97it/s]\n",
+            "heapdict-1.0.0       |    7 KB | : 100% 1.0/1 [00:00<00:00, 16.08it/s]\n",
+            "libffi-3.2.1         |   46 KB | : 100% 1.0/1 [00:00<00:00, 13.59it/s]\n",
+            "chardet-3.0.4        |  190 KB | : 100% 1.0/1 [00:00<00:00,  8.67it/s]               \n",
+            "pynvml-8.0.3         |   30 KB | : 100% 1.0/1 [00:00<00:00,  2.88it/s]               \n",
+            "bokeh-1.3.4          |  4.0 MB | : 100% 1.0/1 [00:01<00:00,  1.30s/it]               \n",
+            "freetype-2.10.0      |  884 KB | : 100% 1.0/1 [00:00<00:00,  4.89it/s]               \n",
+            "nvstrings-0.10.0a    |  124 KB | : 100% 1.0/1 [00:01<00:00,  1.37s/it]               \n",
+            "libxgboost-0.90.rapi | 32.8 MB | : 100% 1.0/1 [00:09<00:00,  9.67s/it]               \n",
+            "pyasn1-0.4.6         |   52 KB | : 100% 1.0/1 [00:00<00:00, 12.03it/s]\n",
+            "brotli-1.0.7         |  1.0 MB | : 100% 1.0/1 [00:00<00:00,  4.92it/s]               \n",
+            "setuptools-41.2.0    |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.53it/s]               \n",
+            "wheel-0.33.6         |   35 KB | : 100% 1.0/1 [00:00<00:00, 13.26it/s]\n",
+            "libgcc-ng-9.1.0      |  8.1 MB | : 100% 1.0/1 [00:01<00:00,  1.23s/it]               \n",
+            "libcuml-0.10.0a      | 29.3 MB | : 100% 1.0/1 [00:10<00:00, 10.22s/it]               \n",
+            "dlpack-0.2           |   12 KB | : 100% 1.0/1 [00:00<00:00,  1.39it/s] \n",
+            "pandas-0.24.2        | 11.1 MB | : 100% 1.0/1 [00:02<00:00,  2.32s/it]               \n",
+            "dask-cuml-0.8.0a     |   30 KB | : 100% 1.0/1 [00:01<00:00,  1.14s/it] \n",
+            "sqlite-3.29.0        |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  2.74it/s]               \n",
+            "libgfortran-ng-7.3.0 |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  3.61it/s]               \n",
+            "toolz-0.10.0         |   46 KB | : 100% 1.0/1 [00:00<00:00, 11.64it/s]\n",
+            "asn1crypto-0.24.0    |  154 KB | : 100% 1.0/1 [00:00<00:00,  9.53it/s]\n",
+            "liblapack-3.8.0      |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.48it/s]\n",
+            "packaging-19.0       |   23 KB | : 100% 1.0/1 [00:00<00:00,  3.70it/s]               \n",
+            "cryptography-2.7     |  607 KB | : 100% 1.0/1 [00:00<00:00,  3.62it/s]               \n",
+            "olefile-0.46         |   31 KB | : 100% 1.0/1 [00:00<00:00, 15.17it/s]\n",
+            "libopenblas-0.3.7    |  7.6 MB | : 100% 1.0/1 [00:01<00:00,  1.29s/it]               \n",
+            "libtiff-4.0.10       |  587 KB | : 100% 1.0/1 [00:00<00:00,  6.35it/s]               \n",
+            "cffi-1.12.3          |  218 KB | : 100% 1.0/1 [00:00<00:00,  8.74it/s]\n",
+            "ncurses-6.1          |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  1.19it/s]               \n",
+            "rmm-0.10.0a          |   14 KB | : 100% 1.0/1 [00:00<00:00,  1.98it/s] \n",
+            "libprotobuf-3.8.0    |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.71s/it]               \n",
+            "pyopenssl-19.0.0     |   81 KB | : 100% 1.0/1 [00:00<00:00, 12.69it/s]\n",
+            "libevent-2.1.10      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  2.66it/s]              \n",
+            "librmm-0.10.0a       |   44 KB | : 100% 1.0/1 [00:00<00:00,  1.99it/s]               \n",
+            "scipy-1.3.1          | 18.1 MB | : 100% 1.0/1 [00:03<00:00,  3.18s/it]               \n",
+            "readline-8.0         |  441 KB | : 100% 1.0/1 [00:00<00:00,  6.79it/s]               \n",
+            "msgpack-python-0.6.1 |   89 KB | : 100% 1.0/1 [00:00<00:00, 13.43it/s]\n",
+            "requests-2.22.0      |   84 KB | : 100% 1.0/1 [00:00<00:00,  1.49it/s]\n",
+            "py-xgboost-0.90.rapi |   86 KB | : 100% 1.0/1 [00:00<00:00,  1.41it/s]               \n",
+            "cuml-0.10.0a         |  5.9 MB | : 100% 1.0/1 [00:02<00:00,  2.12s/it]               \n",
+            "libblas-3.8.0        |   10 KB | : 100% 1.0/1 [00:00<00:00, 13.55it/s]\n",
+            "c-ares-1.15.0        |  100 KB | : 100% 1.0/1 [00:00<00:00, 12.24it/s]\n",
+            "glog-0.4.0           |  104 KB | : 100% 1.0/1 [00:00<00:00, 12.09it/s]\n",
+            "pyarrow-0.14.1       |  2.8 MB | : 100% 1.0/1 [00:00<00:00,  1.27it/s]               \n",
+            "xz-5.2.4             |  366 KB | : 100% 1.0/1 [00:00<00:00,  7.05it/s]               \n",
+            "arrow-cpp-0.14.1     | 17.3 MB | : 100% 1.0/1 [00:02<00:00,  2.84s/it]               \n",
+            "icu-64.2             | 12.6 MB | : 100% 1.0/1 [00:01<00:00,  1.93s/it]               \n",
+            "distributed-2.3.2    |  370 KB | : 100% 1.0/1 [00:00<00:00,  5.21it/s]               \n",
+            "xgboost-0.90.rapidsd |   11 KB | : 100% 1.0/1 [00:01<00:00,  1.01s/it] \n",
+            "locket-0.2.0         |    6 KB | : 100% 1.0/1 [00:00<00:00, 15.02it/s]\n",
+            "snappy-1.1.7         |   39 KB | : 100% 1.0/1 [00:00<00:00, 14.68it/s]\n",
+            "pyjwt-1.7.1          |   17 KB | : 100% 1.0/1 [00:00<00:00, 13.20it/s]\n",
+            "libstdcxx-ng-9.1.0   |  4.0 MB | : 100% 1.0/1 [00:00<00:00,  1.61it/s]               \n",
+            "pysocks-1.7.0        |   26 KB | : 100% 1.0/1 [00:00<00:00, 15.49it/s]\n",
+            "dask-2.3.0           |    4 KB | : 100% 1.0/1 [00:00<00:00, 15.11it/s]\n",
+            "sortedcontainers-2.1 |   25 KB | : 100% 1.0/1 [00:00<00:00, 14.59it/s]\n",
+            "parquet-cpp-1.5.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 14.74it/s]\n",
+            "nccl-2.4.6.1         | 66.6 MB | : 100% 1.0/1 [00:12<00:00, 12.58s/it]               \n",
+            "google-auth-oauthlib |   18 KB | : 100% 1.0/1 [00:00<00:00, 13.18it/s]\n",
+            "cugraph-0.10.0a      |  1.3 MB | : 100% 1.0/1 [00:10<00:00,  3.84s/it]              \n",
+            "libcugraph-0.10.0a   | 11.3 MB | : 100% 1.0/1 [00:18<00:00, 18.20s/it]               \n",
+            "python-3.6.7         | 34.6 MB | : 100% 1.0/1 [00:05<00:00,  5.00s/it]               \n",
+            "openssl-1.1.1c       |  2.1 MB | : 100% 1.0/1 [00:00<00:00,  2.56it/s]               \n",
+            "tornado-6.0.3        |  636 KB | : 100% 1.0/1 [00:00<00:00,  4.46it/s]              \n",
+            "partd-1.0.0          |   16 KB | : 100% 1.0/1 [00:00<00:00, 13.42it/s]\n",
+            "markupsafe-1.1.1     |   26 KB | : 100% 1.0/1 [00:00<00:00, 14.08it/s]\n",
+            "fastavro-0.22.4      |  405 KB | : 100% 1.0/1 [00:00<00:00,  7.21it/s]               \n",
+            "cython-0.29.13       |  2.2 MB | : 100% 1.0/1 [00:00<00:00,  1.86it/s]               \n",
+            "rsa-3.4.2            |   31 KB | : 100% 1.0/1 [00:00<00:00, 13.62it/s]\n",
+            "pyyaml-5.1.2         |  184 KB | : 100% 1.0/1 [00:00<00:00, 10.32it/s]\n",
+            "scikit-learn-0.21.3  |  6.7 MB | : 100% 1.0/1 [00:01<00:00,  1.44s/it]               \n",
+            "decorator-4.4.0      |   11 KB | : 100% 1.0/1 [00:00<00:00, 15.90it/s]\n",
+            "oauthlib-3.0.1       |   82 KB | : 100% 1.0/1 [00:00<00:00,  9.79it/s]\n",
+            "pyparsing-2.4.2      |   57 KB | : 100% 1.0/1 [00:00<00:00, 13.40it/s]\n",
+            "tblib-1.4.0          |   12 KB | : 100% 1.0/1 [00:00<00:00, 15.25it/s]\n",
+            "cudatoolkit-10.0.130 | 380.0 MB | : 100% 1.0/1 [00:46<00:00, 46.86s/it]                \n",
+            "libnvstrings-0.10.0a | 24.8 MB | : 100% 1.0/1 [00:07<00:00,  7.82s/it]               \n",
             "Copying shared object files to /usr/lib\n",
             "\n",
             "*********************************************\n",
@@ -386,10 +386,10 @@
       "metadata": {
         "id": "x1dLRTm168Tk",
         "colab_type": "code",
-        "outputId": "e4ee4a4e-64f3-4e87-8b87-472b02f84325",
+        "outputId": "406a519a-e019-46cf-f0bf-7bba3dd2bb79",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 958
+          "height": 1000
         }
       },
       "source": [
@@ -397,7 +397,7 @@
         "!pip install kaggle\n",
         "!mkdir /root/.kaggle\n",
         "# plug api -- get your own API key\n",
-        "!echo '{\"username\":\"warobson\",\"key\":\"\"}' > /root/.kaggle/kaggle.json\n",
+        "!echo '{\"username\":\"warobson\",\"key\":\"5b4ecdb3cb122fb692a8349124960424\"}' > /root/.kaggle/kaggle.json\n",
         "!chmod 600 /root/.kaggle/kaggle.json\n",
         "# !kaggle datasets download\n",
         "!kaggle competitions download -c zillow-prize-1\n",
@@ -409,44 +409,44 @@
         "!unzip -q \"/content/train_2017.csv.zip\"\n",
         "!unzip -q \"/content/properties_2017.csv.zip\""
       ],
-      "execution_count": 0,
+      "execution_count": 3,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
             "Collecting kaggle\n",
             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e9/fc/0de659ea1f2096563204925b6660ae141f3d85bbe9e8a1571c3eb6cc1fdd/kaggle-1.5.5.tar.gz (56kB)\n",
-            "\u001b[K     |████████████████████████████████| 61kB 2.9MB/s \n",
+            "\u001b[K     |████████████████████████████████| 61kB 31.1MB/s \n",
             "\u001b[?25hCollecting urllib3<1.25,>=1.21.1 (from kaggle)\n",
             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/01/11/525b02e4acc0c747de8b6ccdab376331597c569c42ea66ab0a1dbd36eca2/urllib3-1.24.3-py2.py3-none-any.whl (118kB)\n",
-            "\u001b[K     |████████████████████████████████| 122kB 9.7MB/s \n",
+            "\u001b[K     |████████████████████████████████| 122kB 35.9MB/s \n",
             "\u001b[?25hRequirement already satisfied: six>=1.10 in /usr/local/lib/python3.6/site-packages (from kaggle) (1.12.0)\n",
             "Requirement already satisfied: certifi in /usr/local/lib/python3.6/site-packages (from kaggle) (2019.6.16)\n",
             "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/site-packages (from kaggle) (2.8.0)\n",
             "Requirement already satisfied: requests in /usr/local/lib/python3.6/site-packages (from kaggle) (2.22.0)\n",
             "Collecting tqdm (from kaggle)\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/a5/83/06029af22fe06b8a7be013aeae5e104b3ed26867e5d4ca91408b30aa602e/tqdm-4.34.0-py2.py3-none-any.whl (50kB)\n",
-            "\u001b[K     |████████████████████████████████| 51kB 12.9MB/s \n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/dc/88/d3213e2f3492daf09d8b41631ad6899f56db17ce83ea9c8a579902bafe5e/tqdm-4.35.0-py2.py3-none-any.whl (50kB)\n",
+            "\u001b[K     |████████████████████████████████| 51kB 29.7MB/s \n",
             "\u001b[?25hCollecting python-slugify (from kaggle)\n",
             "  Downloading https://files.pythonhosted.org/packages/a2/5d/bd30413c00bbed3945558aca07c55944073e1e30abeee1f06515281f9811/python-slugify-3.0.3.tar.gz\n",
             "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (2.8)\n",
             "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (3.0.4)\n",
             "Collecting text-unidecode==1.2 (from python-slugify->kaggle)\n",
             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/79/42/d717cc2b4520fb09e45b344b1b0b4e81aa672001dd128c180fabc655c341/text_unidecode-1.2-py2.py3-none-any.whl (77kB)\n",
-            "\u001b[K     |████████████████████████████████| 81kB 28.8MB/s \n",
+            "\u001b[K     |████████████████████████████████| 81kB 32.1MB/s \n",
             "\u001b[?25hBuilding wheels for collected packages: kaggle, python-slugify\n",
             "  Building wheel for kaggle (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for kaggle: filename=kaggle-1.5.5-cp36-none-any.whl size=71896 sha256=ee79b8c43069539b819caedf251aae4360d5dd43aec6a5bc2734275442177e60\n",
+            "  Created wheel for kaggle: filename=kaggle-1.5.5-cp36-none-any.whl size=71896 sha256=d9815b0d9eae6d3594e8dc1a57a33174b1dbe24f623e1d688a92a2588f4e1be0\n",
             "  Stored in directory: /root/.cache/pip/wheels/db/6a/80/6cd1892eb9b9b136333db3c74e16cba4e17e2c700f51541f06\n",
             "  Building wheel for python-slugify (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for python-slugify: filename=python_slugify-3.0.3-py2.py3-none-any.whl size=4789 sha256=a8f8df8b4a56a8db4fc841f6b6ff5f89a9a3c7e641ff4fc8c41d5e7a5c1ec087\n",
+            "  Created wheel for python-slugify: filename=python_slugify-3.0.3-py2.py3-none-any.whl size=4789 sha256=ccca227a48fbd1c2f5ba45701b0b52f1a12b7d1484ba459889128b3712c17b88\n",
             "  Stored in directory: /root/.cache/pip/wheels/0f/96/ca/85f5b01165975402d1e37f8dd346df00dc39be1d0761bd17bb\n",
             "Successfully built kaggle python-slugify\n",
             "Installing collected packages: urllib3, tqdm, text-unidecode, python-slugify, kaggle\n",
             "  Found existing installation: urllib3 1.25.3\n",
             "    Uninstalling urllib3-1.25.3:\n",
             "      Successfully uninstalled urllib3-1.25.3\n",
-            "Successfully installed kaggle-1.5.5 python-slugify-3.0.3 text-unidecode-1.2 tqdm-4.34.0 urllib3-1.24.3\n"
+            "Successfully installed kaggle-1.5.5 python-slugify-3.0.3 text-unidecode-1.2 tqdm-4.35.0 urllib3-1.24.3\n"
           ],
           "name": "stdout"
         },
@@ -469,23 +469,23 @@
           "output_type": "stream",
           "text": [
             "Downloading sample_submission.csv.zip to /content\n",
-            " 91% 9.00M/9.86M [00:00<00:00, 17.1MB/s]\n",
-            "100% 9.86M/9.86M [00:00<00:00, 22.0MB/s]\n",
+            " 51% 5.00M/9.86M [00:00<00:00, 15.9MB/s]\n",
+            "100% 9.86M/9.86M [00:00<00:00, 29.2MB/s]\n",
             "Downloading properties_2016.csv.zip to /content\n",
-            " 98% 156M/159M [00:01<00:00, 103MB/s] \n",
-            "100% 159M/159M [00:01<00:00, 92.1MB/s]\n",
+            " 91% 145M/159M [00:02<00:00, 54.3MB/s]\n",
+            "100% 159M/159M [00:02<00:00, 59.1MB/s]\n",
             "Downloading zillow_data_dictionary.xlsx.zip to /content\n",
             "  0% 0.00/15.7k [00:00<?, ?B/s]\n",
-            "100% 15.7k/15.7k [00:00<00:00, 21.5MB/s]\n",
+            "100% 15.7k/15.7k [00:00<00:00, 14.5MB/s]\n",
             "Downloading train_2016_v2.csv.zip to /content\n",
             "  0% 0.00/632k [00:00<?, ?B/s]\n",
-            "100% 632k/632k [00:00<00:00, 164MB/s]\n",
+            "100% 632k/632k [00:00<00:00, 212MB/s]\n",
             "Downloading train_2017.csv.zip to /content\n",
             "  0% 0.00/825k [00:00<?, ?B/s]\n",
-            "100% 825k/825k [00:00<00:00, 198MB/s]\n",
+            "100% 825k/825k [00:00<00:00, 225MB/s]\n",
             "Downloading properties_2017.csv.zip to /content\n",
-            " 93% 129M/138M [00:01<00:00, 70.7MB/s]\n",
-            "100% 138M/138M [00:01<00:00, 87.7MB/s]\n"
+            " 93% 129M/138M [00:05<00:00, 20.1MB/s]\n",
+            "100% 138M/138M [00:05<00:00, 26.7MB/s]\n"
           ],
           "name": "stdout"
         }
@@ -507,17 +507,17 @@
       "metadata": {
         "id": "6n75DyJ-dm4B",
         "colab_type": "code",
-        "outputId": "b0cc7ddd-7667-475b-ca2f-d3ae6b580331",
+        "outputId": "0b450180-fedb-4251-f106-08dad9ee50f1",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 168
+          "height": 173
         }
       },
       "source": [
         "# display content folder contents\n",
         "!ls \"/content/\""
       ],
-      "execution_count": 0,
+      "execution_count": 4,
       "outputs": [
         {
           "output_type": "stream",
@@ -592,10 +592,10 @@
       "metadata": {
         "id": "uynoUxpx8Xsn",
         "colab_type": "code",
-        "outputId": "80e4f89a-4c16-41a2-dffe-2db4a7dddbd8",
+        "outputId": "8b688642-b7d0-4785-b428-4c5130419e17",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 118
+          "height": 121
         }
       },
       "source": [
@@ -605,7 +605,7 @@
         "# peek display 2016 train\n",
         "print(train2016.head())"
       ],
-      "execution_count": 0,
+      "execution_count": 6,
       "outputs": [
         {
           "output_type": "stream",
@@ -626,10 +626,10 @@
       "metadata": {
         "id": "2EfApIzCfEtr",
         "colab_type": "code",
-        "outputId": "7e91f5f7-7b76-410a-b700-0380b29bd982",
+        "outputId": "fc64b5dd-72a4-44d2-bfe4-2d5a6760a381",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 151
+          "height": 156
         }
       },
       "source": [
@@ -638,7 +638,7 @@
         "# peek display 2016 properties\n",
         "print(prop2016.head())"
       ],
-      "execution_count": 0,
+      "execution_count": 7,
       "outputs": [
         {
           "output_type": "stream",
@@ -679,10 +679,10 @@
       "metadata": {
         "id": "o4CvSIcwm4B2",
         "colab_type": "code",
-        "outputId": "327cc4dd-bad3-40f2-9d09-41105b532abb",
+        "outputId": "8857aa84-5f36-4e74-8600-afc8d29ef16a",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 151
+          "height": 156
         }
       },
       "source": [
@@ -733,7 +733,7 @@
         "# what's the data frame look like?\n",
         "print(df_train.head())"
       ],
-      "execution_count": 0,
+      "execution_count": 8,
       "outputs": [
         {
           "output_type": "stream",
@@ -804,10 +804,10 @@
       "metadata": {
         "id": "B3-1V93smA9A",
         "colab_type": "code",
-        "outputId": "28a73c5c-abf2-4325-a575-b654c9ddd9f4",
+        "outputId": "61e3186a-d6f6-4b52-8459-a1052ccb35d3",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 67
+          "height": 69
         }
       },
       "source": [
@@ -820,7 +820,7 @@
         "\n",
         "print(df_train.just_hottub_or_spa.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 10,
       "outputs": [
         {
           "output_type": "stream",
@@ -885,10 +885,10 @@
       "metadata": {
         "id": "FBgs7zJm3qk-",
         "colab_type": "code",
-        "outputId": "3c3935ec-9d5e-4806-c701-1191f563ccdd",
+        "outputId": "93f2d4bf-264e-4f92-ddcb-5cda84c0e80e",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 67
+          "height": 69
         }
       },
       "source": [
@@ -900,7 +900,7 @@
         "\n",
         "print(df_train.just_hottub_or_spa.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 12,
       "outputs": [
         {
           "output_type": "stream",
@@ -985,17 +985,17 @@
       "metadata": {
         "id": "OZM6lXmmpj5k",
         "colab_type": "code",
-        "outputId": "1d5124b4-31fa-43ae-ae0b-712ac79fde3b",
+        "outputId": "989a44db-4064-4d82-e217-6c1b409871a4",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 50
+          "height": 52
         }
       },
       "source": [
         "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
         "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
-      "execution_count": 0,
+      "execution_count": 15,
       "outputs": [
         {
           "output_type": "stream",
@@ -1025,10 +1025,10 @@
       "metadata": {
         "id": "i3YRZgU_qZhA",
         "colab_type": "code",
-        "outputId": "a6231c9e-37cd-4766-9743-c85f3aa61654",
+        "outputId": "94e69d14-8a62-424f-e0ad-4598cec8bf49",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 50
+          "height": 52
         }
       },
       "source": [
@@ -1049,7 +1049,7 @@
         "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
         "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
-      "execution_count": 0,
+      "execution_count": 16,
       "outputs": [
         {
           "output_type": "stream",
@@ -1106,10 +1106,10 @@
       "metadata": {
         "id": "gbbUIbwJ-ouS",
         "colab_type": "code",
-        "outputId": "115cac03-580c-477e-b5c3-0d191c333b2d",
+        "outputId": "aa91093c-b914-4a44-ebf9-6ff93d5ab5e9",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 118
+          "height": 121
         }
       },
       "source": [
@@ -1117,7 +1117,7 @@
         "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
         "print(df_train.loc[conditions][garage].head())"
       ],
-      "execution_count": 0,
+      "execution_count": 18,
       "outputs": [
         {
           "output_type": "stream",
@@ -1125,9 +1125,9 @@
             "    garagecarcnt  garage_sqft\n",
             "16           2.0          0.0\n",
             "29           1.0          0.0\n",
-            "32           1.0          0.0\n",
-            "35           1.0          0.0\n",
-            "36           2.0          0.0\n"
+            "36           2.0          0.0\n",
+            "54           2.0          0.0\n",
+            "65           1.0          0.0\n"
           ],
           "name": "stdout"
         }
@@ -1244,10 +1244,10 @@
       "metadata": {
         "id": "yHZH4rMNLfBA",
         "colab_type": "code",
-        "outputId": "6ba5f661-caa5-44b8-b492-b9f5708181db",
+        "outputId": "53844d43-16cb-41f2-8684-5268848f1476",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 202
+          "height": 208
         }
       },
       "source": [
@@ -1303,7 +1303,7 @@
         "# let's see how out unit counts look\n",
         "print(df_train.unitcnt.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 22,
       "outputs": [
         {
           "output_type": "stream",
@@ -1396,10 +1396,10 @@
       "metadata": {
         "id": "8lYcO_T5XKNN",
         "colab_type": "code",
-        "outputId": "2440dccb-bc7d-459c-ae1a-cc31388be45e",
+        "outputId": "e7ff645b-ac87-4039-d135-db8fd49855da",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 303
+          "height": 311
         }
       },
       "source": [
@@ -1415,7 +1415,7 @@
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
             "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-28-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m<ipython-input-84-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
             "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1141\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1143\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1145\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
             "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    717\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m         ):\n\u001b[0;32m--> 719\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    720\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
             "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
@@ -1449,10 +1449,10 @@
       "metadata": {
         "id": "Svp6J0cJ5dL0",
         "colab_type": "code",
-        "outputId": "352d2f36-658f-4698-bfdb-5c748b67f0d7",
+        "outputId": "fd9373e8-8a1f-45a3-a7bd-387f0e678d2c",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 67
+          "height": 69
         }
       },
       "source": [
@@ -1471,7 +1471,7 @@
         "# display values in tax delinquency flag column\n",
         "print(df_train['taxdelinquencyflag'].value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 24,
       "outputs": [
         {
           "output_type": "stream",
@@ -1501,16 +1501,16 @@
       "metadata": {
         "id": "lHh95mAIMrMy",
         "colab_type": "code",
-        "outputId": "244b62b2-299c-4440-83d2-b5545712ba3e",
+        "outputId": "37c584b1-76a4-4df7-ca71-23a9d50f165a",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 67
+          "height": 69
         }
       },
       "source": [
         "print(df_train.taxdelinquencyflag.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 25,
       "outputs": [
         {
           "output_type": "stream",
@@ -1528,10 +1528,10 @@
       "metadata": {
         "id": "6Bic66I9LfGC",
         "colab_type": "code",
-        "outputId": "4311fb13-6d49-44e1-83ef-73e27d4720c4",
+        "outputId": "af959592-7a80-42ee-8fda-3e18d6b9c514",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 235
+          "height": 243
         }
       },
       "source": [
@@ -1552,7 +1552,7 @@
         "# what've we got? \n",
         "print(df_train.taxdelinquencyyear.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 26,
       "outputs": [
         {
           "output_type": "stream",
@@ -1608,10 +1608,10 @@
       "metadata": {
         "id": "Sg0eN-K1QdZy",
         "colab_type": "code",
-        "outputId": "0e6ca58c-3b13-4c9e-c902-4d8a9c98a855",
+        "outputId": "3cdd2ef6-fb68-46bd-a611-5dd3d9ff45d2",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 474
+          "height": 489
         }
       },
       "source": [
@@ -1671,7 +1671,7 @@
         "\"\"\"\n",
         "print(df_train[['census_tractnumber', 'block_number']].head())"
       ],
-      "execution_count": 0,
+      "execution_count": 29,
       "outputs": [
         {
           "output_type": "stream",
@@ -1726,10 +1726,10 @@
       "metadata": {
         "id": "xhCosNpXvTVU",
         "colab_type": "code",
-        "outputId": "b8ca9fb3-6c67-4466-d7cc-98ff52504659",
+        "outputId": "3f70a009-a211-46fd-ecc9-731be3d15fe1",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 84
+          "height": 86
         }
       },
       "source": [
@@ -1749,7 +1749,7 @@
         "# drop columns with more than 95% null values\n",
         "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
       ],
-      "execution_count": 0,
+      "execution_count": 30,
       "outputs": [
         {
           "output_type": "stream",
@@ -1792,10 +1792,10 @@
       "metadata": {
         "id": "yB2lzAyopS_S",
         "colab_type": "code",
-        "outputId": "2860febf-c7ad-4823-d170-2633c4be8ae5",
+        "outputId": "06922e76-c61b-4212-afc5-4f3a51eaaa09",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 218
+          "height": 225
         }
       },
       "source": [
@@ -1804,7 +1804,7 @@
         "# let's see what we've got\n",
         "print(df_train['unitcnt'].value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 31,
       "outputs": [
         {
           "output_type": "stream",
@@ -1843,7 +1843,7 @@
       "metadata": {
         "id": "-icFDeLSoJwl",
         "colab_type": "code",
-        "outputId": "5ea8e799-3105-4601-82d4-54bd00c5056b",
+        "outputId": "03d9a89b-6e21-4bba-ae75-aa229c744dcf",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1862,7 +1862,7 @@
         "\n",
         "print(df_train.pool_sqft.isna().sum())"
       ],
-      "execution_count": 0,
+      "execution_count": 32,
       "outputs": [
         {
           "output_type": "stream",
@@ -1890,10 +1890,10 @@
       "metadata": {
         "id": "3pVABkZTYK9F",
         "colab_type": "code",
-        "outputId": "345e4225-6a09-4fae-efb3-c9abe56c622a",
+        "outputId": "e926b021-7cbb-4acd-96c8-2a15fbe1afd7",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 84
+          "height": 86
         }
       },
       "source": [
@@ -1920,7 +1920,7 @@
         "print(df_train.total_parcel_tax.isnull().sum())\n",
         "print(df_train.land_tax.isnull().sum())"
       ],
-      "execution_count": 0,
+      "execution_count": 33,
       "outputs": [
         {
           "output_type": "stream",
@@ -1939,7 +1939,7 @@
       "metadata": {
         "id": "8SID48LOpYvu",
         "colab_type": "code",
-        "outputId": "1d369c4a-759e-4331-b5fe-6c784ae66897",
+        "outputId": "842c0ccb-1710-4e73-f85a-599d5b27988b",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1950,7 +1950,7 @@
         "df_train = df_train.drop(['regionidcounty'], axis=1)\n",
         "df_train.shape"
       ],
-      "execution_count": 0,
+      "execution_count": 34,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1962,7 +1962,7 @@
           "metadata": {
             "tags": []
           },
-          "execution_count": 51
+          "execution_count": 34
         }
       ]
     },
@@ -1971,7 +1971,7 @@
       "metadata": {
         "id": "tWmM2J8_pkg1",
         "colab_type": "code",
-        "outputId": "44689c09-a426-48c9-eae8-7e81af63080e",
+        "outputId": "e544a196-1e32-4d18-a6b5-98c49a57589e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1984,7 +1984,7 @@
         "df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0] = np.nan\n",
         "print(df_train.bedroomcnt.isnull().sum())"
       ],
-      "execution_count": 0,
+      "execution_count": 35,
       "outputs": [
         {
           "output_type": "stream",
@@ -2000,10 +2000,10 @@
       "metadata": {
         "id": "3qnP2L9LpmeJ",
         "colab_type": "code",
-        "outputId": "a4e9550d-5ea8-4066-d3f3-ea73bfe04cef",
+        "outputId": "2e863b26-9267-45f2-f31f-3305f5577ce3",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 101
+          "height": 104
         }
       },
       "source": [
@@ -2040,7 +2040,7 @@
         "print(df_train.bedroomcnt.isnull().sum())\n",
         "print(df_train.roomcnt.isnull().sum())"
       ],
-      "execution_count": 0,
+      "execution_count": 36,
       "outputs": [
         {
           "output_type": "stream",
@@ -2074,10 +2074,10 @@
       "metadata": {
         "id": "IW4CG2InpolD",
         "colab_type": "code",
-        "outputId": "47e46700-fe9c-4b98-9941-014ee6dea441",
+        "outputId": "288444a4-d153-4624-c961-b3956092d87e",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 252
+          "height": 260
         }
       },
       "source": [
@@ -2129,7 +2129,7 @@
         "print(f'AFTER\\n{df_train.numberofstories.value_counts()}\\n'\n",
         "      f'{df_train.numberofstories.isnull().sum()} remaining null values')"
       ],
-      "execution_count": 0,
+      "execution_count": 37,
       "outputs": [
         {
           "output_type": "stream",
@@ -2158,10 +2158,10 @@
       "metadata": {
         "id": "AHcMsDCxprd4",
         "colab_type": "code",
-        "outputId": "3a327d21-4675-41ce-aa9e-f52ae86eb491",
+        "outputId": "516954f4-d1d9-4876-e3a4-4545d865d9f6",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 286
+          "height": 295
         }
       },
       "source": [
@@ -2190,7 +2190,7 @@
         "print(f'AFTER\\n{df_train.fireplace_count.value_counts()}\\n'\n",
         "      f'{df_train.fireplace_count.isnull().sum()} remaining null values')"
       ],
-      "execution_count": 0,
+      "execution_count": 38,
       "outputs": [
         {
           "output_type": "stream",
@@ -2221,14 +2221,13 @@
       "metadata": {
         "id": "FIuSWoJspt3H",
         "colab_type": "code",
-        "outputId": "9c5daebd-4b2a-461b-8490-350d19fa7ba8",
+        "outputId": "d8b6ef02-d214-4530-ce3a-c6efc5bd01cf",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 317
         }
       },
       "source": [
-        "\n",
         "# set basic sns \n",
         "color = sns.color_palette()\n",
         "sns.set(style=\"darkgrid\")\n",
@@ -2242,7 +2241,7 @@
         "# display the graph\n",
         "plt.show()"
       ],
-      "execution_count": 0,
+      "execution_count": 41,
       "outputs": [
         {
           "output_type": "display_data",
@@ -2263,7 +2262,7 @@
       "metadata": {
         "id": "KOHPCFRSp5y9",
         "colab_type": "code",
-        "outputId": "3aa099cd-791f-4a5a-9ea7-29168fc239b9",
+        "outputId": "471d6f7c-607a-4520-d219-3ab56500c004",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 274
@@ -2275,12 +2274,12 @@
         "# display the graph\n",
         "plt.show()"
       ],
-      "execution_count": 0,
+      "execution_count": 42,
       "outputs": [
         {
           "output_type": "display_data",
           "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGx1JREFUeJzt3WtwE+ehBuB3JSFjA6ot2YC5ppnG\n1JmG0DqDGTeFYAimDZDWzJSUi5OBhLYpKbRhUkLTkgYCUUlomEBi0kNLSDLwxx7aQE8hHS4pUCg0\nF+oMxdQY8Az4ItkcY4LtSPudHyDFF0nWZaXd9fc+v8iu9tt3V6tX8mqjVYQQAkREJBWL3gGIiCj1\nWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEbHoHaGm5AVUVcLkGw+tt0ztOXJhdH8yeembNDfSf7BaLgqysQQmPqXv5q6qAqorgv82K\n2fXB7Kln1twAs3fF0z5ERBJi+RMRSYjlT0QkoajK3+12o7i4GOPGjUN1dTUAoKWlBU888QRKSkow\ne/ZsLFu2DM3NzUkNS0RE2ojqC99p06ahrKwMCxYsCE5TFAWPP/44CgsLAdx6g3j55Zexfv365CSl\npGs9cRyeygr4mr2AxQKoKmxOF7JL58Ixqaj3YwIsFjgmT8HwhY9GHLPnWPFmC4yTM7sk7m1NRLTb\nFHJf3VYdYtzAWNeO/R3tZ8+GnNfXemxOFzLGj8dnZ850ywcg7HPbdV5g2mf/PY/WD44Aqtrr+a1/\n560v5vWkKEDP24LfXp8yaBAURYHa1hacFjOLBQPHjYOvofHWtqSlAZ2d3dYZbj/Wj78HnZ/7eu1b\nTVitgN/fLWe414RRKLHcwL24uBjl5eXIy8vrNW///v3YtWsXduzYEVMAr7cNqiqQkzMETU3XY1rW\nKPpD9tYTx9GwcwdEZ2evxyh2O4aVPQYAYR8DAI4HpnY72EONGRgrljeAcON8ZdmPodz99ajH0UK0\n2xRpf0YUqjwTXY/VCkAB/L7o5oXJ4HhgKtLT7Wj43/1RboyOIuzHVOr5mohX146xWBS4XIMTHlOT\nc/6qqmLXrl0oLi7WYjjSgaeyImyBiM5OeCorIj4GwK1Pg32MGRgr0WyisxOX3343pnG0EO029bWv\nwopQWHGvx+8PXfzh5oXJ0PrBETTsfz/yuozCAMUP9H5NGIkm1/mvXbsWGRkZWLhwYczLdn0Hy8kZ\nokUcXZg9e3VL5O9rfH3MBwCoarf9EG5MX0tzTPsr3DgdHm/K93u029TX/oxXqtYTUjynaWTX4zWR\nCK2P9YTL3+1249KlSygvL4fFEvsfEjzto69AdluWM+S56QBblhMAIj4GFku3/RBuTFuWM6b9FW6c\ntGxXyvd7tNvU1/5MZP2pWE9Igdc33wSi1+M1ES/DnfbZtGkTqqqqsHXrVtjt9oTDkH6yS+dCCfMc\nKnY7skvnRnwMADgmT+lzzMBYiWZT7HaMWbQgzBLJE+029bWvwlKU8LPiXY/VCljDfM4LNS9MBsfk\nKRhW8mDkdRlFhP2YSj1fE0Ziff7555/v60Hr1q3Dc889h8bGRuzfvx+VlZWYOHEinn76aQwYMAB7\n9uzB7t27cfToUTz00EMxBbh5sxNCAIMGpeGzz+I4R2oA/SF72qjRGOByof3iRag3b976lCcEbE4X\nhj4yH45JRb0fE2CxwDHlgV5fbPV8fNexYhFunNEzpqV8v0e7TWH3VQQ2pwtDFyyE7+Zn8Hk8vedF\nsR6b04XBhZPgb73+Rb75CzD4618P/dz2mBfIYHE40HH58q1z512e39FTivB/9U1fzOspVOneXp8y\naBAsaWm3vqO4PS1mFgsGfvWrgCpubUtaWq+/RMLtR8f4e6A4Xb32rSas1u7bE+Y1Ea+uHaMoCjIy\nEv+wHdPVPsnA0z76YnZ9mDW7WXMD/Se7IU77EBGRObH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgk\nxPInIpIQy5+ISEIsfyIiCbH8iYgkpMlPOhMRxUqru7xRfFj+RJRyPe9A5mv2omHnDgDgG0CK8LQP\nEaWcVnd5o/ix/Iko5cLdgCZlN6Yhlj8RpZ7N6YppOmmP5U9EKafVXd4ofvzCl4hSLvClLq/20Q/L\nn4h04ZhUxLLXEU/7EBFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\nqM/yd7vdKC4uxrhx41BdXR2cXltbi3nz5qGkpATz5s3DxYsXk5mTiIg01OfPO0ybNg1lZWVYsGBB\nt+lr1qzB/Pnz8fDDD+NPf/oTfv3rX2Pnzp1JC6q3+nfeQusHRwBVBSwWOCZPQcZX7oKnsgLVzV7A\nYgFUNam/UdL1zkdBigIMGAB0dkaVoecY1b0ekTqBnPVv/RH4/POYlw+XfWB+PsY8/Yte0y+/4kb7\n2bNfTEhPB27ejHm9WtBzvyfCrLkBg2VPT0fea2/oGkERQohoHlhcXIzy8nLk5eXB6/WipKQEJ0+e\nhNVqhd/vR2FhIQ4cOACn0xlTAK+3DaoqkJMzBE1N1+PaiGSrf+cttB4+1HuGogAhdp9it2NY2WOa\nvgH0vPNRX0JliHUMM+v5BtCr+In0FsMbQNd+tFgUuFyDE159XOf8r169imHDhsFqtQIArFYrhg4d\niqtXryYcyIhaPzgSekaY981k3JEo1J2PIgmVIdYxzKxn0bP4yXB0+qszQPdf9ez6DpaTM0THJOFV\nq2rMy/hamjXdnuqW5oQzxDOGmXXbdh1zEIUTS0do3Y9xlX9ubi4aGhrg9/uDp30aGxuRm5sb81hm\nOO0TOJceC1uWU9PtsWU5Y77FXc8M8YxhZoY9nohui/YYNcxpH5fLhfz8fOzduxcAsHfvXuTn58d8\nvt8sHJOnhJ6hKKEnJ+GORKHufBRJqAyxjmFmA/PzI/43ke7S03VdfZ9f+K5btw4HDhyAx+NBVlYW\nMjMzsW/fPtTU1GDVqlVobW2Fw+GA2+3GnXfeGXMAU3zyR+SrfXwmvtpHT4le7ROOGa72IcnFeLVP\nMj75R321T7KYpfwjYXZ9MHvqmTU30H+y63rah4iIzI3lT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMR\nSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEbIkO\ncOjQIWzevBlCCAghsGzZMsyYMUOLbERElCQJlb8QAs888wzeffdd5OXl4T//+Q9+8IMfYPr06bBY\n+EeFLFpPHIensgK+Zi9sTheyS+fCMalI71hx6U/bYib9ab+H2hYAhtu+hD/5WywWXL9+HQBw/fp1\nDB06lMUvkdYTx9GwcwdEZycAwNfsRcPOHQCg+8Edq/60LWbSn/Z7qG2p/+MfAAjA7w9OM8L2JdTS\niqLg1VdfxZNPPompU6fiJz/5Cdxut1bZyAQ8lRXBAz1AdHbCU1mhU6L49adtMZP+tN9DbQv8vmDx\nBxhh+xL65O/z+bBt2za8/vrrKCgowL/+9S+sWLEC+/btw6BBg6Iaw+UaHPx3Ts6QROLoStbs1S3N\nIaf7WppTsk+0XEeqt8Wsx4zWuVO535O9z8NtSyixbp/W2RMq/7Nnz6KxsREFBQUAgIKCAqSnp6Om\npgbjx4+Pagyvtw2qKpCTMwRNTdcTiaMbmbPbspzwNXtDTk/2PtF6v6dyW8x6zCQjd6r2eyr2ebht\nCffYaPN0zW6xKN0+NMcrodM+w4cPR319PS5cuAAAqKmpgdfrxZgxYxIORuaQXToXit3ebZpitwe/\n5DKT/rQtZtKf9nuobYHVBlit3SYZYfsS+uSfk5OD559/HsuXL4eiKACA9evXIzMzU5NwZHyBL6yM\ndiVDPPrTtphJf9rv4bYl1DS9t08RQgg9A/C0j76YXR9mzW7W3ED/yW6I0z5ERGROLH8iIgmx/ImI\nJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgklfDOXVKt+6sfAzZvdpgV+K+Oz\n/55H6wdHAFUFLBY4Jk9B6+FDyc+U9DUkD7Prw6zZzZob0DG7ogBCABYLoKr8bZ+AWH7bJ1TxBwV2\nMBGRwSl2O4aVPRb1GwB/2ydc8QMsfiIyDSPcyctc5U9E1E9Ee9OXZGH5ExHpwOZ06bp+c5V/enr4\nebdvJkNEZHRGuJOXqco/77U3Qr4B2JwuDF/yBBwPTL31jTpw62qfB6amOCERUQ+BD6a3u8nmdMX0\nZW+ymOpqH6Nidn0we+qZNTfQf7LLebUPERFpguVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8R\nkYRY/kREEmL5ExFJKOGbuXR0dGD9+vX4xz/+gbS0NEyYMAFr167VIhsRESVJwuW/ceNGpKWlYf/+\n/VAUBR6PR4tcRESm13riODyVFfA1ew1zB6+AhMr/xo0b2LNnD44cOQLl9o8XZWdnaxKMiMjMWk8c\nR8POHRCdnQBu/X5/w84dAGCIN4CEzvnX1dUhMzMTW7ZsQWlpKRYtWoTTp09rlY2IyLQ8lRXB4g8w\nwh28AhL65O/3+1FXV4e7774bv/jFL/DJJ5/gRz/6Ed5//30MHhzdr851/XW6nJwhicTRFbPrg9lT\nz6y5gdRmr25pDjnd19IcVw6tsydU/rm5ubDZbJg1axYA4N5770VWVhZqa2txzz33RDUGf9JZX8yu\nD7NmN2tuIPXZbVnOkLdqtGU5Y85huJ90djqdKCwsxLFjxwAAtbW18Hq9GDt2bMLBiIjMLLt0LhS7\nvds0I9zBKyDhq31+85vfYPXq1XC73bDZbPjtb38Lh8OhRTYiItMKfKnbL6/2AYDRo0fj7bff1iIL\nEVG/4phUZJiy74n/hy8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8R\nkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVP\nRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJSLPy37JlC8aNG4fq6mqthiQi\noiSxaTHIp59+io8//hgjR47UYriYtZ44Dk9lBXzNXticLmSXzoVjUlGf8+Id8/IrbrSfPRt8bLe3\nO0UBBgwAOjsBiwVQ1eDyAIJjBubpzcxv1cyeembNDRg3e97/7NBlvQmXf2dnJ1544QW88sorKCsr\n0yJTTFpPHEfDzh0QnZ0AAF+zFw07dwTnh5sX6Q0g0pjXjv29W/H3IsSt4geC5e5r9qL+j38AIAC/\nv9s8IpJb9eOP6fIGkHD5b968GXPmzMGoUaO0yBMzT2VFsKQDRGcnPJUVwX+Hmhep/CON6Wv2xhfU\n74tvOSKiJEio/D/66CNUVVVh5cqVcY/hcg0O/jsnZ0jMy1e3NIec7gszPTAv0rriGZOIKF7RdF88\n/RhJQuV/6tQp1NTUYNq0aQCA+vp6LFmyBBs2bMD9998f1RhebxtUVSAnZwiamq7HnMGW5Qz5adyW\n5QSAsPMirSvSmHF/8iciCqOv7uvajxaL0u1Dc7wSutpn6dKlOHr0KA4ePIiDBw9i+PDh2L59e9TF\nr4Xs0rlQ7PZu0xS7HdmlcyPOi3fMgfn58QW12gCrNb5liYg0psnVPnoKnLuPdEVPrFf7RBrTMamo\n19U+3Zjsah8i0pdeV/soQgihy5pvS/S0jxEwuz6YPfXMmhvoP9kNcdqHiIjMieVPRCQhlj8RkYRY\n/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQh\nlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\niOVPRCQhlj8RkYRsiSzc0tKCZ555BpcvX4bdbsfYsWPxwgsvwOl0apWPiIiSIKFP/oqi4PHHH8f+\n/fvx3nvvYfTo0Xj55Ze1ykZEREmSUPlnZmaisLAw+N8TJkzAlStXEg5FRETJpQghhBYDqaqKxYsX\no7i4GGVlZVoMSURESZLQOf+u1q5di4yMDCxcuDCm5bzeNqiqQE7OEDQ1XdcqTkoxuz6YPfXMmhvo\nP9ktFgUu1+CEx9Sk/N1uNy5duoTy8nJYLLyAiIjI6BIu/02bNqGqqgpvvvkm7Ha7FpmIiCjJEir/\n8+fPY9u2bbjjjjvwyCOPAABGjRqFrVu3ahKOiIiSI6Hyv+uuu3Du3DmtshARUYrwBD0RkYRY/kRE\nEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYQ0+1VPuqX1xHF4Kivga/bC5nQh\nu3QuHJOKYl4uY/x4fHbmTMhxuj7WMngwhBAQN270uVyo9VU3ewGLBVBVzfdFslWHma6kpUF0dMDm\ndME2bCjaz50z3PaFy250Zs0NGDC7xQLH5CkYvvBRXVav2e/5x6s//aRz64njaNi5A6KzMzhPsdsx\nrOyxiG8AoZbrKTAOgD4fG2q5ruuPZn1ElBqOB6b2+QaQjJ905mkfDXkqK3oVqujshKeyIublegqM\nE81j+1p/rGMQUfK0fnBEl/XytI+GfM3emKZHOz/Wx/W1XLzjEFES6HRKkp/8NWRzumKaHu38ro+L\n9rGRxo9nDCJKEp1ugMXy11B26VwoPW5oo9jtyC6dG/NyPQXGieaxfa0/1jGIKHkck6fosl6e9tFQ\n4EvVWK/2CbVcX1ftJHK1T8/1mfVqn3DMcLUPEa/26UdX+5gRs+vDrNnNmhvoP9l5tQ8REcWN5U9E\nJCGWPxGRhFj+REQS0v1qH4tFCflvs2F2fTB76pk1N9A/smu1Dbpf7UNERKnH0z5ERBJi+RMRSYjl\nT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJKSvm73W4UFxdj3LhxqK6uDk4/dOgQ\nvvvd7+Lhhx/GnDlzcODAgajm1dbWYt68eSgpKcG8efNw8eLFZMSOmP3w4cP43ve+h9mzZ2PhwoWo\nq6uLKp+Rs7e0tOCJJ55ASUkJZs+ejWXLlqG5uTm43Mcff4w5c+agpKQEixcvhtebvHv/xrPfA7Zs\n2dJrOaNn7+jowJo1azBjxgzMnj0bv/rVr4LzjHzMAMZ4rUY6diM99/HO0zt7bW0tFi1ahJkzZ2LW\nrFl49tln0d7eHhzz4MGDmDlzJh588EGsWLECN2/e7DuISIJTp06JK1euiKlTp4pz584JIYRQVVXc\nd999wf8+e/asmDBhgvD7/RHnCSHEokWLxJ49e4QQQuzZs0csWrQoGbHDZr927ZqYOHGiuHDhQjDD\n4sWLg8tEymfk7C0tLeLEiRPB5V966SXx7LPPCiGE8Pv9Yvr06eLUqVNCCCG2bt0qVq1aZZjsAVVV\nVWLJkiXdljND9rVr14oXX3xRqKoqhBCiqakpOM/Ix4xRXqvhjt1Iz32884yQva6uTnz66afBrMuX\nLxdbtmwRQgjR1tYmioqKRG1trRBCiNWrV4vXXnutzxxJKf+AnuU/ceJEcfr0aSGEEP/85z/FjBkz\n+pzn8XhEQUGB8Pl8QgghfD6fKCgoEF6vN5nRu2X/5JNPxHe+853gvJaWFpGXlye8Xm/EfEbP3tNf\n//pX8eijjwaXe+ihh4LzvF6vmDBhQlJzCxFb9o6ODvH9739f1NXV9VrOyNnb2tpEQUGBaGtr6zWG\n0Y8ZI75Whfji2I303Mc7zwjZe9q+fbtYvXq1EEKIv/zlL2Lp0qXBeWfOnOn2/IWTsl/1VBQFr776\nKp588klkZGTgxo0bePPNN/ucd/XqVQwbNgxWqxUAYLVaMXToUFy9ehVOpzMl2b/85S/D4/HgzJkz\nGD9+PN57771gNiFE2HyR5hkhe9cMqqpi165dKC4uDs4fMWJEcL7T6YSqqrh27RoyMzMNkX3z5s2Y\nM2cORo0a1W05o2e3Wq3IzMzEli1bcPLkSQwaNAjLly/HfffdZ/jj3el0Gu612vXYjfTcxzsvmcdM\ntNm7Zmhvb0dFRQV+/vOfA+h9vI8YMQJXr17tc90p+8LX5/Nh27ZteP3113Ho0CG88cYbWLFiBW7c\nuBFxnhEMGTIEv/vd77BhwwaUlpbC6/XC4XAED3Ijizb72rVrkZGRgYULF+qUtLdI2T/66CNUVVVh\n/vz5escMKVJ2v9+Puro63H333aisrMTKlSvx1FNPoa2tTe/YACJnN+Jr1YjHbrRize7z+fCzn/0M\nkyZNwrRp0xJad8o++Z89exaNjY0oKCgAABQUFCA9PR01NTVQFCXsvJEjR6KhoQF+vz/4wmlsbERu\nbm6qogMAioqKUFRUBADweDzYvn07xowZg5s3b4bNJ4QwdPYAt9uNS5cuoby8HBbLrc8Dubm5uHLl\nSvAxzc3NsFgsKfvk3Ff2d955BzU1NcEXQH19PZYsWYINGzYYPnt7eztsNhtmzZoFALj33nuRlZWF\n2tpajBgxwtDHTKTXsR6v1Z7HbqTnPt55RsgOAH6/HytXrsSXvvQlPPfcc8HH5ebm4uTJk8H/vnLl\nSlT7PGWf/IcPH476+npcuHABAFBTUwOv14sxY8ZEnOdyuZCfn4+9e/cCAPbu3Yv8/PyU/Qkc0NTU\nBODWn2mbNm3CI488goyMjIj5jJ4dADZt2oSqqips3boVdrs9uMzXvvY1tLe34/Tp0wCA3bt3Y+bM\nmSnNHSn70qVLcfToURw8eBAHDx7E8OHDsX37dtx///2Gz+50OlFYWIhjx44BuHUlh9frxdixYw1/\nzBjptRrq2I303Mc7zwjZVVXFqlWrYLVa8eKLL0JRvrihy7e+9S38+9//Dl5ZtXv3bnz729/uM0NS\nbuaybt06HDhwAB6PB1lZWcjMzMS+ffvw5z//Gb///e+DwX/6059i+vTpABBxXk1NDVatWoXW1lY4\nHA643W7ceeedWseOmP2Xv/wlPvzwQ3z++ef45je/idWrVyMtLa3PfEbOfv78ecyaNQt33HEHBg4c\nCAAYNWoUtm7dCgD48MMPsWbNGnR0dGDkyJHYuHEjsrOzDZG9p+LiYpSXlyMvL88U2evq6rB69Wpc\nu3YNNpsNK1aswJQpUwAY+5gBjPFajXTsRnru452nd/bDhw/jhz/8IfLy8oJ/nX/jG9/AmjVrAAB/\n+9vfsHHjRqiqivz8fLz00kvBD3jh8E5eREQS4v/hS0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/\nEZGEWP5ERBJi+RMRSej/AZusTW/jKGeJAAAAAElFTkSuQmCC\n",
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGxxJREFUeJzt3X1wFPXBB/Dv3p0XEuCa3CVAeLVO\nDY1TkTYOYVILEpDQCtiGmWJ5iQ4obS0WWhlFaosVBE+Uygga7EOLqAP/JEMr9CnY4cUChUJ9oXEo\noSFAZiAvdwlPCJLEu/09f8Cdebm73Mve7W5+389fuHv72+/u7X3vsrfeKkIIASIikopF7wBERJR6\nLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKS\nEMufiEhCNr0DtLRch6oKuFyD4PW26R0nLsyuD2ZPPbPmBvpPdotFQVbWwITH1L38VVVAVUXw32bF\n7Ppg9tQza26A2bviaR8iIgmx/ImIJMTyJyKSUFTl73a7UVxcjLFjx6K6uhoA0NLSgscffxwlJSWY\nNWsWli5diubm5qSGJSIibUT1he/UqVNRVlaG+fPnB6cpioLHHnsMhYWFAG6+QbzyyitYt25dcpJS\n0rUePwZPZQV8zV7AYgFUFTanC9mlc+CYWNT7MQEWCxyTJmPYgkcijtlzrHizBcbJmVUS97YmItpt\nCrmvbqkOMW5grKtH/472M2dCzutrPTanCxnjxuHz06e75QMQ9rntOi8w7fP/nkPrh4cBVe31/Na/\n+/aX83pSFKDnbcFvrU8ZOBCKokBtawtOi5nFggFjx8LX0HhzW9LSgM7ObusMtx/rx92Nzi98vfat\nJqxWwO/vljPca8IolFhu4F5cXIzy8nLk5eX1mrdv3z7s3LkT27dvjymA19sGVRXIyRmMpqZrMS1r\nFP0he+vxY2jYsR2is7PXYxS7HUPLHgWAsI8BAMf9U7od7KHGDIwVyxtAuHG+tvSnUO76ZtTjaCHa\nbYq0PyMKVZ6JrsdqBaAAfl9088JkcNw/BenpdjT8774oN0ZHEfZjKvV8TcSra8dYLApcrkEJj6nJ\nOX9VVbFz504UFxdrMRzpwFNZEbZARGcnPJUVER8D4OanwT7GDIyVaDbR2YlL77wX0zhaiHab+tpX\nYUUorLjX4/eHLv5w88JkaP3wMBr2fRB5XUZhgOIHer8mjEST6/zXrFmDjIwMLFiwIOZlu76D5eQM\n1iKOLsyevbol8vc1vj7mAwBUtdt+CDemr6U5pv0VbpwOjzfl+z3abeprf8YrVesJKZ7TNLLr8ZpI\nhNbHesLl73a7cfHiRZSXl8Niif0PCZ720Vcguy3LGfLcdIAtywkAER8Di6Xbfgg3pi3LGdP+CjdO\nWrYr5fs92m3qa38msv5UrCekwOubbwLR6/GaiJfhTvts3LgRVVVV2LJlC+x2e8JhSD/ZpXOghHkO\nFbsd2aVzIj4GAByTJvc5ZmCsRLMpdjtGL5wfZonkiXab+tpXYSlK+FnxrsdqBaxhPueFmhcmg2PS\nZAwteSDyuowiwn5MpZ6vCSOxPv/888/39aC1a9fiueeeQ2NjI/bt24fKykpMmDABTz31FG677Tbs\n3r0bu3btwpEjR/Dggw/GFODGjU4IAQwcmIbPP4/jHKkB9IfsaSNH4TaXC+0XLkC9cePmpzwhYHO6\nMOTheXBMLOr9mACLBY7J9/f6Yqvn47uOFYtw44yaPjXl+z3abQq7ryKwOV0YMn8BfDc+h8/j6T0v\nivXYnC4MKpwIf+u1L/PNm49B3/xm6Oe2x7xABovDgY5Ll26eO+/y/I6aXIT/q2/6cl5PoUr31vqU\ngQNhSUu7+R3FrWkxs1gw4OtfB1Rxc1vS0nr9JRJuPzrG3Q3F6eq1bzVhtXbfnjCviXh17RhFUZCR\nkfiH7Ziu9kkGnvbRF7Prw6zZzZob6D/ZDXHah4iIzInlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIU1+0pmIKFZa3eWN4sPyJ6KU63kHMl+zFw07tgMA3wBShKd9\niCjltLrLG8WP5U9EKRfuBjQpuzENsfyJKPVsTldM00l7LH8iSjmt7vJG8eMXvkSUcoEvdXm1j35Y\n/kSkC8fEIpa9jnjah4hIQix/IiIJsfyJiCTE8icikhDLn4hIQix/IiIJsfyJiCTE8icikhDLn4hI\nQn2Wv9vtRnFxMcaOHYvq6urg9NraWsydOxclJSWYO3cuLly4kMycRESkoT5/3mHq1KkoKyvD/Pnz\nu01fvXo15s2bh4ceegh/+tOf8Jvf/AY7duxIWlC91b/7Nlo/PAyoKmCxwDFpMjK+dic8lRWobvYC\nFgugqkn9jZKudz4KUhTAbgc6OqLK0HOM6l6PSJ1Azvq3/wh88UXMy4fLPiA/H6OfeqbX9EuvutF+\n5syXE9LTgRs3Yl6vFvTc74kwa27AYNnT05H3+pu6RlCEECKaBxYXF6O8vBx5eXnwer0oKSnBiRMn\nYLVa4ff7UVhYiP3798PpdMYUwOttg6oK5OQMRlPTtbg2Itnq330brYcO9p6hKECI3afY7Rha9qim\nbwA973zUl1AZYh3DzHq+AfQqfiK9xfAG0LUfLRYFLteghFcf1zn/K1euYOjQobBarQAAq9WKIUOG\n4MqVKwkHMqLWDw+HnhHmfTMZdyQKdeejSEJliHUMM+tZ9Cx+Mhyd/uoM0P1XPbu+g+XkDNYxSXjV\nqhrzMr6WZk23p7qlOeEM8YxhZt22XcccROHE0hFa92Nc5Z+bm4uGhgb4/f7gaZ/Gxkbk5ubGPJYZ\nTvsEzqXHwpbl1HR7bFnOmG9x1zNDPGOYmWGPJ6Jboj1GDXPax+VyIT8/H3v27AEA7NmzB/n5+TGf\n7zcLx6TJoWcoSujJSbgjUag7H0USKkOsY5jZgPz8iP9NpLv0dF1X3+cXvmvXrsX+/fvh8XiQlZWF\nzMxM7N27FzU1NVi5ciVaW1vhcDjgdrtxxx13xBzAFJ/8EflqH5+Jr/bRU6JX+4Rjhqt9SHIxXu2T\njE/+UV/tkyxmKf9ImF0fzJ56Zs0N9J/sup72ISIic2P5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\niOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kRE\nEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhW6ID\nHDx4EJs2bYIQAkIILF26FNOnT9ciGxERJUlC5S+EwNNPP4333nsPeXl5+M9//oMf/ehHmDZtGiwW\n/lEhi9bjx+CprICv2Qub04Xs0jlwTCzSO1Zc+tO2mEl/2u+htgWA4bYv4U/+FosF165dAwBcu3YN\nQ4YMYfFLpPX4MTTs2A7R2QkA8DV70bBjOwDofnDHqj9ti5n0p/0ealvq//gHAALw+4PTjLB9CbW0\noih47bXX8MQTT2DKlCn42c9+BrfbrVU2MgFPZUXwQA8QnZ3wVFbolCh+/WlbzKQ/7fdQ2wK/L1j8\nAUbYvoQ++ft8PmzduhVvvPEGCgoK8K9//QvLly/H3r17MXDgwKjGcLkGBf+dkzM4kTi6kjV7dUtz\nyOm+luaU7BMt15HqbTHrMaN17lTu92Tv83DbEkqs26d19oTK/8yZM2hsbERBQQEAoKCgAOnp6aip\nqcG4ceOiGsPrbYOqCuTkDEZT07VE4uhG5uy2LCd8zd6Q05O9T7Te76ncFrMeM8nInar9nop9Hm5b\nwj022jxds1ssSrcPzfFK6LTPsGHDUF9fj/PnzwMAampq4PV6MXr06ISDkTlkl86BYrd3m6bY7cEv\nucykP22LmfSn/R5qW2C1AVZrt0lG2L6EPvnn5OTg+eefx7Jly6AoCgBg3bp1yMzM1CQcGV/gCyuj\nXckQj/60LWbSn/Z7uG0JNU3v7VOEEELPADztoy9m14dZs5s1N9B/shvitA8REZkTy5+ISEIsfyIi\nCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIJ38wl1aqf/Clw40a3aYHfyvj8\nv+fQ+uFhQFUBiwWOSZPReuhg8jMlfQ3Jw+z6MGt2s+YGdMyuKIAQgMUCqCp/2ycglt/2CVX8QYEd\nTERkcIrdjqFlj0b9BsDf9glX/ACLn4hMwwh38jJX+RMR9RPR3vQlWVj+REQ6sDlduq7fXOWfnh5+\n3q2byRARGZ0R7uRlqvLPe/3NkG8ANqcLwxY/Dsf9U25+ow7cvNrn/ikpTkhE1EPgg+mtbrI5XTF9\n2Zssprrax6iYXR/MnnpmzQ30n+xyXu1DRESaYPkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQSSvhmLh0dHVi3bh3+8Y9/IC0tDePHj8eaNWu0yEZEREmScPlv2LABaWlp2Ldv\nHxRFgcfj0SIXEZHptR4/Bk9lBXzNXsPcwSsgofK/fv06du/ejcOHD0O59eNF2dnZmgQjIjKz1uPH\n0LBjO0RnJ4Cbv9/fsGM7ABjiDSChc/51dXXIzMzE5s2bUVpaioULF+LUqVNaZSMiMi1PZUWw+AOM\ncAevgIQ++fv9ftTV1eGuu+7CM888g08//RQ/+clP8MEHH2DQoOh+da7rr9Pl5AxOJI6umF0fzJ56\nZs0NpDZ7dUtzyOm+lua4cmidPaHyz83Nhc1mw8yZMwEA99xzD7KyslBbW4u77747qjH4k876YnZ9\nmDW7WXMDqc9uy3KGvFWjLcsZcw7D/aSz0+lEYWEhjh49CgCora2F1+vFmDFjEg5GRGRm2aVzoNjt\n3aYZ4Q5eAQlf7fPb3/4Wq1atgtvths1mw8svvwyHw6FFNiIi0wp8qdsvr/YBgFGjRuGdd97RIgsR\nUb/imFhkmLLvif+HLxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUlIs/LfvHkzxo4di+rqaq2GJCKi\nJLFpMchnn32GTz75BCNGjNBiuJi1Hj8GT2UFfM1e2JwuZJfOgWNiUZ/z4h3z0qtutJ85E3xst7c7\nRQFuuw3o7AQsFkBVg8sDCI4ZmKc3M79VM3vqmTU3YNzsef+zXZf1Jlz+nZ2deOGFF/Dqq6+irKxM\ni0wxaT1+DA07tkN0dgIAfM1eNOzYHpwfbl6kN4BIY149+vduxd+LEDeLHwiWu6/Zi/o//gGAAPz+\nbvOISG7Vjz2qyxtAwuW/adMmzJ49GyNHjtQiT8w8lRXBkg4QnZ3wVFYE/x1qXqTyjzSmr9kbX1C/\nL77liIiSIKHy//jjj1FVVYUVK1bEPYbLNSj475ycwTEvX93SHHK6L8z0wLxI64pnTCKieEXTffH0\nYyQJlf/JkydRU1ODqVOnAgDq6+uxePFirF+/Hvfdd19UY3i9bVBVgZycwWhquhZzBluWM+SncVuW\nEwDCzou0rkhjxv3Jn4gojL66r2s/WixKtw/N8Uroap8lS5bgyJEjOHDgAA4cOIBhw4Zh27ZtURe/\nFrJL50Cx27tNU+x2ZJfOiTgv3jEH5OfHF9RqA6zW+JYlItKYJlf76Clw7j7SFT2xXu0TaUzHxKJe\nV/t0Y7KrfYhIX3pd7aMIIYQua74l0dM+RsDs+mD21DNrbqD/ZDfEaR8iIjInlj8RkYRY/kREEmL5\nExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY\n/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQh\nlj8RkYRY/kREErIlsnBLSwuefvppXLp0CXa7HWPGjMELL7wAp9OpVT4iIkqChD75K4qCxx57DPv2\n7cP777+PUaNG4ZVXXtEqGxERJUlC5Z+ZmYnCwsLgf48fPx6XL19OOBQRESWXIoQQWgykqioWLVqE\n4uJilJWVaTEkERElSULn/Ltas2YNMjIysGDBgpiW83rboKoCOTmD0dR0Tas4KcXs+mD21DNrbqD/\nZLdYFLhcgxIeU5Pyd7vduHjxIsrLy2Gx8AIiIiKjS7j8N27ciKqqKrz11luw2+1aZCIioiRLqPzP\nnTuHrVu34vbbb8fDDz8MABg5ciS2bNmiSTgiIkqOhMr/zjvvxNmzZ7XKQkREKcIT9EREEmL5ExFJ\niOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEtLsVz3pptbjx+CprICv2Qub04Xs\n0jlwTCyKebmMcePw+enTIcfp+ljLoEEQQkBcv97ncqHWV93sBSwWQFU13xfJVh1mupKWBtHRAZvT\nBdvQIWg/e9Zw2xcuu9GZNTdgwOwWCxyTJmPYgkd0Wb1mv+cfr/70k86tx4+hYcd2iM7O4DzFbsfQ\nskcjvgGEWq6nwDgA+nxsqOW6rj+a9RFRajjun9LnG0AyftKZp3005Kms6FWoorMTnsqKmJfrKTBO\nNI/ta/2xjkFEydP64WFd1svTPhryNXtjmh7t/Fgf19dy8Y5DREmg0ylJfvLXkM3piml6tPO7Pi7a\nx0YaP54xiChJdLoBFstfQ9mlc6D0uKGNYrcju3ROzMv1FBgnmsf2tf5YxyCi5HFMmqzLennaR0OB\nL1Vjvdon1HJ9XbWTyNU+Pddn1qt9wjHD1T5EvNqnH13tY0bMrg+zZjdrbqD/ZOfVPkREFDeWPxGR\nhFj+REQSYvkTEUlI96t9LBYl5L/Nhtn1weypZ9bcQP/IrtU26H61DxERpR5P+xARSYjlT0QkIZY/\nEZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSSgp5e92u1FcXIyxY8eiuro6OP3gwYP4\n/ve/j4ceegizZ8/G/v37o5pXW1uLuXPnoqSkBHPnzsWFCxeSETti9kOHDuEHP/gBZs2ahQULFqCu\nri6qfEbO3tLSgscffxwlJSWYNWsWli5diubm5uByn3zyCWbPno2SkhIsWrQIXm/y7v0bz34P2Lx5\nc6/ljJ69o6MDq1evxvTp0zFr1iz8+te/Ds4z8jEDGOO1GunYjfTcxztP7+y1tbVYuHAhZsyYgZkz\nZ+LZZ59Fe3t7cMwDBw5gxowZeOCBB7B8+XLcuHGj7yAiCU6ePCkuX74spkyZIs6ePSuEEEJVVXHv\nvfcG//vMmTNi/Pjxwu/3R5wnhBALFy4Uu3fvFkIIsXv3brFw4cJkxA6b/erVq2LChAni/PnzwQyL\nFi0KLhMpn5Gzt7S0iOPHjweXf+mll8Szzz4rhBDC7/eLadOmiZMnTwohhNiyZYtYuXKlYbIHVFVV\nicWLF3dbzgzZ16xZI1588UWhqqoQQoimpqbgPCMfM0Z5rYY7diM99/HOM0L2uro68dlnnwWzLlu2\nTGzevFkIIURbW5soKioStbW1QgghVq1aJV5//fU+cySl/AN6lv+ECRPEqVOnhBBC/POf/xTTp0/v\nc57H4xEFBQXC5/MJIYTw+XyioKBAeL3eZEbvlv3TTz8V3/ve94LzWlpaRF5envB6vRHzGT17T3/9\n61/FI488ElzuwQcfDM7zer1i/PjxSc0tRGzZOzo6xA9/+ENRV1fXazkjZ29raxMFBQWira2t1xhG\nP2aM+FoV4stjN9JzH+88I2Tvadu2bWLVqlVCCCH+8pe/iCVLlgTnnT59utvzF07KftVTURS89tpr\neOKJJ5CRkYHr16/jrbfe6nPelStXMHToUFitVgCA1WrFkCFDcOXKFTidzpRk/+pXvwqPx4PTp09j\n3LhxeP/994PZhBBh80WaZ4TsXTOoqoqdO3eiuLg4OH/48OHB+U6nE6qq4urVq8jMzDRE9k2bNmH2\n7NkYOXJkt+WMnt1qtSIzMxObN2/GiRMnMHDgQCxbtgz33nuv4Y93p9NpuNdq12M30nMf77xkHjPR\nZu+aob29HRUVFfjlL38JoPfxPnz4cFy5cqXPdafsC1+fz4etW7fijTfewMGDB/Hmm29i+fLluH79\nesR5RjB48GD87ne/w/r161FaWgqv1wuHwxE8yI0s2uxr1qxBRkYGFixYoFPS3iJl//jjj1FVVYV5\n8+bpHTOkSNn9fj/q6upw1113obKyEitWrMCTTz6JtrY2vWMDiJzdiK9VIx670Yo1u8/nwy9+8QtM\nnDgRU6dOTWjdKfvkf+bMGTQ2NqKgoAAAUFBQgPT0dNTU1EBRlLDzRowYgYaGBvj9/uALp7GxEbm5\nuamKDgAoKipCUVERAMDj8WDbtm0YPXo0bty4ETafEMLQ2QPcbjcuXryI8vJyWCw3Pw/k5ubi8uXL\nwcc0NzfDYrGk7JNzX9nfffdd1NTUBF8A9fX1WLx4MdavX2/47O3t7bDZbJg5cyYA4J577kFWVhZq\na2sxfPhwQx8zkV7HerxWex67kZ77eOcZITsA+P1+rFixAl/5ylfw3HPPBR+Xm5uLEydOBP/78uXL\nUe3zlH3yHzZsGOrr63H+/HkAQE1NDbxeL0aPHh1xnsvlQn5+Pvbs2QMA2LNnD/Lz81P2J3BAU1MT\ngJt/pm3cuBEPP/wwMjIyIuYzenYA2LhxI6qqqrBlyxbY7fbgMt/4xjfQ3t6OU6dOAQB27dqFGTNm\npDR3pOxLlizBkSNHcODAARw4cADDhg3Dtm3bcN999xk+u9PpRGFhIY4ePQrg5pUcXq8XY8aMMfwx\nY6TXaqhjN9JzH+88I2RXVRUrV66E1WrFiy++CEX58oYu3/nOd/Dvf/87eGXVrl278N3vfrfPDEm5\nmcvatWuxf/9+eDweZGVlITMzE3v37sWf//xn/P73vw8G//nPf45p06YBQMR5NTU1WLlyJVpbW+Fw\nOOB2u3HHHXdoHTti9l/96lf46KOP8MUXX+Db3/42Vq1ahbS0tD7zGTn7uXPnMHPmTNx+++0YMGAA\nAGDkyJHYsmULAOCjjz7C6tWr0dHRgREjRmDDhg3Izs42RPaeiouLUV5ejry8PFNkr6urw6pVq3D1\n6lXYbDYsX74ckydPBmDsYwYwxms10rEb6bmPd57e2Q8dOoQf//jHyMvLC/51/q1vfQurV68GAPzt\nb3/Dhg0boKoq8vPz8dJLLwU/4IXDO3kREUmI/4cvEZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5E\nRBJi+RMRSYjlT0Qkof8Hm7xNb6groUQAAAAASUVORK5CYII=\n",
             "text/plain": [
               "<Figure size 432x288 with 1 Axes>"
             ]
@@ -2320,10 +2319,11 @@
         "colab": {}
       },
       "source": [
+        "from cuml.preprocessing.model_selection import train_test_split\n",
         "#location seems to be related to building quality, (knnclassifier)\n",
         "\n",
-        "def fillna_knn( df, base, target):\n",
-        "    data_colnames = [ target ] + base\n",
+        "def fillna_knn(df, base, target):\n",
+        "    data_colnames = [target] + base\n",
         "    #print(\"data_colnames\",data_colnames)\n",
         "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
         "    #print(\"miss\",missing_values_boolflag.head())\n",
@@ -2331,11 +2331,12 @@
         "    #print(\"not miss\",not_missing_boolflag.head())\n",
         "    number_of_missing_val = missing_values_boolflag.sum()\n",
         "    print(\"# of miss\",number_of_missing_val)\n",
-        "    not_missing_rows = df.loc[ not_missing_boolflag, data_colnames ]\n",
+        "    not_missing_rows = df.loc[not_missing_boolflag, data_colnames]\n",
         "    #print(not_missing_rows.head())\n",
         "    Y = not_missing_rows[target]\n",
         "    X = not_missing_rows[base]\n",
-        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192,stratify=Y)\n",
+        "    #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192,stratify=Y)\n",
+        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8)\n",
         "    metrics       = ['euclidean'] \n",
         "    weights       = ['distance'] \n",
         "    numNeighbors  = [5,10,15,20,25]\n",
@@ -2360,43 +2361,13 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "AT8Osn51lD9v",
-        "colab_type": "code",
-        "outputId": "9a3af301-2c19-4bfd-faca-3dba219a270c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 50
-        }
-      },
-      "source": [
-        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "temp['buildingqualitytypeid']=temp['buildingqualitytypeid'].fillna(-1)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "32911\n",
-            "(90275, 45)\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "f8rNxkrxACGe",
+        "id": "6eES-hq--NKZ",
         "colab_type": "code",
         "colab": {}
       },
       "source": [
-        "\"\"\"RESET WIRE\"\"\"\n",
-        "# hold_df = df_train.copy()\n",
-        "df_train = hold_df.copy()"
+        "# test = df_train.copy()\n",
+        "df_train = test.copy()"
       ],
       "execution_count": 0,
       "outputs": []
@@ -2404,35 +2375,56 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "OkyuebKaACxa",
+        "id": "AT8Osn51lD9v",
         "colab_type": "code",
-        "outputId": "d0dc876b-b02f-4179-91d0-d9a9b42e0e27",
+        "outputId": "83435ba5-0887-47fb-f8fb-ceeb9dd92fda",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 185
+          "height": 573
         }
       },
       "source": [
-        "\n",
-        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "temp['buildingqualitytypeid']=temp['buildingqualitytypeid'].fillna(-1)\n",
-        "print(temp.to_pandas().head())\n"
+        "print('CURRENT DF SITUATION\\n')\n",
+        "print(f'SHAPE = {df_train.shape}')\n",
+        "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}\\n')\n",
+        "print(f'BUILDINGTYPEID HEAD\\n{df_train.buildingqualitytypeid.head()}\\n')\n",
+        "print(f'DF TRAIN HEAD\\n{df_train.head()}')"
       ],
-      "execution_count": 0,
+      "execution_count": 49,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "32911\n",
-            "(90275, 45)\n",
-            "   parcelid  logerror  ac_id  ...  transaction_month  census_tractnumber  block_number\n",
-            "0  11827818    0.0402    NaN  ...                  3             5315.03          1013\n",
-            "1  12123024    0.0296    NaN  ...                  3             4625.00          1017\n",
-            "2  13867327    0.0344    NaN  ...                  3             0114.01          2017\n",
-            "3  12681894    0.0060    NaN  ...                  3             6513.02          1004\n",
-            "4  12848541    0.0695    1.0  ...                  3             4087.03          1018\n",
+            "CURRENT DF SITUATION\n",
+            "\n",
+            "SHAPE = (90275, 45)\n",
+            "NULL COUNT = 32911\n",
+            "VALUE COUNTS\n",
+            "7.0     29310\n",
+            "4.0     23839\n",
+            "1.0      2627\n",
+            "10.0     1461\n",
+            "12.0      119\n",
+            "8.0         5\n",
+            "6.0         2\n",
+            "11.0        1\n",
+            "Name: buildingqualitytypeid, dtype: int32\n",
+            "\n",
+            "BUILDINGTYPEID HEAD\n",
+            "0     7.0\n",
+            "1    null\n",
+            "2    null\n",
+            "3     7.0\n",
+            "4     4.0\n",
+            "Name: buildingqualitytypeid, dtype: float64\n",
+            "\n",
+            "DF TRAIN HEAD\n",
+            "   parcelid  logerror ac_id  ...  transaction_month  census_tractnumber  block_number\n",
+            "0  11827818    0.0402  null  ...                  3             5315.03          1013\n",
+            "1  12123024    0.0296  null  ...                  3             4625.00          1017\n",
+            "2  13867327    0.0344  null  ...                  3             0114.01          2017\n",
+            "3  12681894    0.0060  null  ...                  3             6513.02          1004\n",
+            "4  12848541    0.0695   1.0  ...                  3             4087.03          1018\n",
             "\n",
             "[5 rows x 45 columns]\n"
           ],
@@ -2445,50 +2437,48 @@
       "metadata": {
         "id": "79bB7JKdAEtX",
         "colab_type": "code",
-        "outputId": "29f38a6a-dac2-4917-8f1b-8a4b198afe67",
+        "outputId": "b1b1e940-e89a-40e8-c5af-5919c896ca19",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 118
+          "height": 225
         }
       },
       "source": [
-        "print(temp.to_pandas().buildingqualitytypeid.head())"
+        "temp=df_train.copy()\n",
+        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].fillna(-1)\n",
+        "print(f'NULL COUNT = {temp.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{temp.buildingqualitytypeid.value_counts()}')"
       ],
-      "execution_count": 0,
+      "execution_count": 50,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "0    7.0\n",
-            "1   -1.0\n",
-            "2   -1.0\n",
-            "3    7.0\n",
-            "4    4.0\n",
-            "Name: buildingqualitytypeid, dtype: float64\n"
+            "NULL COUNT = 0\n",
+            "VALUE COUNTS\n",
+            "-1.0     32911\n",
+            " 7.0     29310\n",
+            " 4.0     23839\n",
+            " 1.0      2627\n",
+            " 10.0     1461\n",
+            " 12.0      119\n",
+            " 8.0         5\n",
+            " 6.0         2\n",
+            " 11.0        1\n",
+            "Name: buildingqualitytypeid, dtype: int32\n"
           ],
           "name": "stdout"
         }
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DVgF1c_p_bN1",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: break-----"
-      ]
-    },
     {
       "cell_type": "code",
       "metadata": {
         "id": "mAB9bsrPAGzQ",
         "colab_type": "code",
-        "outputId": "2f9eaa73-a7b2-4634-e24d-9aec777b2536",
+        "outputId": "ff5376d3-6854-4d05-a7c1-7ffe0a6136a4",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 387
+          "height": 347
         }
       },
       "source": [
@@ -2499,157 +2489,139 @@
         "# conditions = (temp.buildingqualitytypeid.value_counts > 3)\n",
         "# print(temp.loc[temp.buildingqualitytypeid.astype(int) > 3].head())\n",
         "# temp.loc[temp.census_tractnumber.value_counts() > 3]\n",
-        "print(temp.loc[temp.census_tractnumber.value_counts().values > 3].to_pandas().head())\n",
+        "# print(temp.loc[temp.census_tractnumber.value_counts().values > 3].to_pandas().head())\n",
         "\n",
-        "# temp = temp.loc[]\n",
-        "print(temp.to_pandas().buildingqualitytypeid.head())\n"
+        "\"\"\"still working on how to best do this in RAPIDS\n",
+        "\"\"\"\n",
+        "print(f'{temp.buildingqualitytypeid.value_counts()}\\n')\n",
+        "temp = temp.to_pandas()\n",
+        "temp = temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+        "temp = cudf.from_pandas(temp)\n",
+        "print(temp.buildingqualitytypeid.value_counts())"
       ],
-      "execution_count": 0,
+      "execution_count": 51,
       "outputs": [
         {
-          "output_type": "error",
-          "ename": "RuntimeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-63-19513b5ffbd3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcensus_tractnumber\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# temp = temp.loc[]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    108\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m                 \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple_arg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    112\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple_arg\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    210\u001b[0m                 \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcolumns_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m                     \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumns_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    213\u001b[0m         \u001b[0;31m# Step 3: Gather index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# we have a single row\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m     54\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m         \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_loc_to_iloc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     37\u001b[0m             \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     39\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    390\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    391\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 392\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    393\u001b[0m         \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    394\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/column.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    530\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    531\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_bool_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_boolean_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    533\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    534\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/columnops.py\u001b[0m in \u001b[0;36mapply_boolean_mask\u001b[0;34m(self, mask)\u001b[0m\n\u001b[1;32m    116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"bool\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mapply_apply_boolean_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mcolumn_empty_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnewsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32mcudf/bindings/stream_compaction.pyx\u001b[0m in \u001b[0;36mcudf.bindings.stream_compaction.apply_apply_boolean_mask\u001b[0;34m()\u001b[0m\n",
-            "\u001b[0;31mRuntimeError\u001b[0m: cuDF failure at: /conda/conda-bld/libcudf_1566412619056/work/cpp/src/stream_compaction/apply_boolean_mask.cu:64: Column size mismatch"
-          ]
+          "output_type": "stream",
+          "text": [
+            "-1.0     32911\n",
+            " 7.0     29310\n",
+            " 4.0     23839\n",
+            " 1.0      2627\n",
+            " 10.0     1461\n",
+            " 12.0      119\n",
+            " 8.0         5\n",
+            " 6.0         2\n",
+            " 11.0        1\n",
+            "Name: buildingqualitytypeid, dtype: int32\n",
+            "\n",
+            "-1.0     32911\n",
+            " 7.0     29310\n",
+            " 4.0     23839\n",
+            " 1.0      2627\n",
+            " 10.0     1461\n",
+            " 12.0      119\n",
+            " 8.0         5\n",
+            "Name: buildingqualitytypeid, dtype: int32\n"
+          ],
+          "name": "stdout"
         }
       ]
     },
     {
       "cell_type": "code",
       "metadata": {
-        "id": "QCyed1SjAJFP",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "print(temp.to_pandas().head())\n"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "1JgQ1Tq2NRsz",
+        "id": "uCyRxp-7qEXf",
         "colab_type": "code",
-        "outputId": "c113cc08-3a69-4aa1-d05e-7b4d2a5df9fa",
+        "outputId": "629f0745-3a63-4bd8-aa10-835a94450cb6",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 162
+          "height": 52
         }
       },
       "source": [
-        "df_train.loc[df_train.buildingqualitytypeid>3]"
+        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].replace(-1,np.nan)\n",
+        "print(temp.buildingqualitytypeid.isnull().sum())\n",
+        "print(temp.shape)"
       ],
-      "execution_count": 0,
+      "execution_count": 52,
       "outputs": [
         {
-          "output_type": "error",
-          "ename": "NameError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-75-25a467e8484f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;31mNameError\u001b[0m: name 'buildingqualitytypeid' is not defined"
-          ]
+          "output_type": "stream",
+          "text": [
+            "32911\n",
+            "(90272, 45)\n"
+          ],
+          "name": "stdout"
         }
       ]
     },
     {
-      "cell_type": "code",
+      "cell_type": "markdown",
       "metadata": {
-        "id": "XFkPwjUmHu4Y",
-        "colab_type": "code",
-        "outputId": "00b5fdb3-25fc-460a-bbd3-aaa421a93555",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 279
-        }
+        "id": "DVgF1c_p_bN1",
+        "colab_type": "text"
       },
       "source": [
-        "temp=temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "AttributeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-72-7111ac5c7eeb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"buildingqualitytypeid\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/groupby/groupby.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    133\u001b[0m             )\n\u001b[1;32m    134\u001b[0m         raise AttributeError(\n\u001b[0;32m--> 135\u001b[0;31m             \u001b[0;34m\"'DataFrameGroupBy' object has no attribute \"\u001b[0m \u001b[0;34m\"'{}'\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    136\u001b[0m         )\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'filter'"
-          ]
-        }
+        "# -----current: break-----\n",
+        "- below is last cell run"
       ]
     },
     {
       "cell_type": "code",
       "metadata": {
-        "id": "uCyRxp-7qEXf",
+        "id": "Q3ZBSOHm-79A",
         "colab_type": "code",
-        "outputId": "969848f0-fbc6-4388-dca2-08f8bde03990",
+        "outputId": "3da3e840-8d13-426a-e0aa-8ae20679326b",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 380
+          "height": 394
         }
       },
       "source": [
-        "\n",
-        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].replace(-1,np.nan)\n",
-        "print(temp.buildingqualitytypeid.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "\n",
-        "missing_values=fillna_knn(temp,\n",
-        "                  base = [ 'latitude', 'longitude' ] ,\n",
-        "                  target = 'buildingqualitytypeid')\n",
+        "missing_values = fillna_knn(temp, \n",
+        "                            base = ['latitude', 'longitude'], \n",
+        "                            target = 'buildingqualitytypeid')\n",
         "\n",
         "print(\"predicted output shape\",missing_values.shape)\n",
         "missing_values_boolflag = df_train['buildingqualitytypeid'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'buildingqualitytypeid' ]  = missing_values\n",
+        "df_train.loc[missing_values_boolflag, 'buildingqualitytypeid'] = missing_values\n",
         "\n",
         "print(df_train.buildingqualitytypeid.isnull().sum())"
       ],
-      "execution_count": 0,
+      "execution_count": 53,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "32911\n",
-            "(90275, 45)\n"
+            "# of miss 32911\n"
           ],
           "name": "stdout"
         },
         {
           "output_type": "error",
-          "ename": "AttributeError",
+          "ename": "NameError",
           "evalue": "ignored",
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-44-2202aaa9de30>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mtemp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"buildingqualitytypeid\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'buildingqualitytypeid'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnan\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/groupby/groupby.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    133\u001b[0m             )\n\u001b[1;32m    134\u001b[0m         raise AttributeError(\n\u001b[0;32m--> 135\u001b[0;31m             \u001b[0;34m\"'DataFrameGroupBy' object has no attribute \"\u001b[0m \u001b[0;34m\"'{}'\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    136\u001b[0m         )\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'filter'"
+            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-53-d133e1117381>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m missing_values = fillna_knn(temp, \n\u001b[1;32m      2\u001b[0m                             \u001b[0mbase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m                             target = 'buildingqualitytypeid')\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-43-54081f35e0d8>\u001b[0m in \u001b[0;36mfillna_knn\u001b[0;34m(df, base, target)\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0mnumNeighbors\u001b[0m  \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m     \u001b[0mparam_grid\u001b[0m    \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn_neighbors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnumNeighbors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m     \u001b[0mcv\u001b[0m            \u001b[0;34m=\u001b[0m \u001b[0mStratifiedKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3192\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     24\u001b[0m     \u001b[0mgrid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mneighbors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mKNeighborsClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'f1_weighted'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrefit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreturn_train_score\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mpre_dispatch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'n_jobs'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m     \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mY_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mNameError\u001b[0m: name 'StratifiedKFold' is not defined"
           ]
         }
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bgXh5OATEacY",
+        "colab_type": "text"
+      },
+      "source": [
+        "# BELOW NOT RUN"
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
@@ -3727,6 +3699,47 @@
       ],
       "execution_count": 0,
       "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WzATgLxmam5w",
+        "colab_type": "text"
+      },
+      "source": [
+        "In this competition, Zillow is asking you to predict the log-error between their Zestimate and the actual sale price, given all the features of a home. The log error is defined as\n",
+        "\n",
+        "logerror=log(Zestimate)−log(SalePrice)\n",
+        "and it is recorded in the transactions file train.csv. In this competition, you are going to predict the logerror for the months in Fall 2017. Since all the real estate transactions in the U.S. are publicly available, we will close the competition (no longer accepting submissions) before the evaluation period begins.\n",
+        "\n",
+        "Train/Test split\n",
+        "You are provided with a full list of real estate properties in three counties (Los Angeles, Orange and Ventura, California) data in 2016.\n",
+        "The train data has all the transactions before October 15, 2016, plus some of the transactions after October 15, 2016.\n",
+        "The test data in the public leaderboard has the rest of the transactions between October 15 and December 31, 2016.\n",
+        "The rest of the test data, which is used for calculating the private leaderboard, is all the properties in October 15, 2017, to December 15, 2017. This period is called the \"sales tracking period\", during which we will not be taking any submissions.\n",
+        "You are asked to predict 6 time points for all properties: October 2016 (201610), November 2016 (201611), December 2016 (201612), October 2017 (201710), November 2017 (201711), and December 2017 (201712).\n",
+        "Not all the properties are sold in each time period. If a property was not sold in a certain time period, that particular row will be ignored when calculating your score.\n",
+        "If a property is sold multiple times within 31 days, we take the first reasonable value as the ground truth. By \"reasonable\", we mean if the data seems wrong, we will take the transaction that has a value that makes more sense.\n",
+        "File descriptions\n",
+        "properties_2016.csv - all the properties with their home features for 2016. Note: Some 2017 new properties don't have any data yet except for their parcelid's. Those data points should be populated when properties_2017.csv is available.\n",
+        "properties_2017.csv - all the properties with their home features for 2017 (released on 10/2/2017)\n",
+        "train_2016.csv - the training set with transactions from 1/1/2016 to 12/31/2016\n",
+        "train_2017.csv - the training set with transactions from 1/1/2017 to 9/15/2017 (released on 10/2/2017)\n",
+        "sample_submission.csv - a sample submission file in the correct format"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "R0yrYUf7anN0",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
     }
   ]
 }
\ No newline at end of file

From 189711a1a39f4cde39b1c2aa9280d0a039c614eb Mon Sep 17 00:00:00 2001
From: gumdropsteve <warobson@gmail.com>
Date: Mon, 9 Sep 2019 08:13:24 -0700
Subject: [PATCH 4/7] run through as is; data string conversion and current
 .filter workaround issues displayed/labeled; removed rapids and kaggle
 install logs (first few cell outputs) for readability

---
 .../zillow_kaggle_zestimate_comp.ipynb        | 1077 ++++++++---------
 1 file changed, 533 insertions(+), 544 deletions(-)

diff --git a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
index f05586f5..c8d68291 100644
--- a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
+++ b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
@@ -43,11 +43,11 @@
       "metadata": {
         "id": "W-um5d-x7o46",
         "colab_type": "code",
-        "outputId": "a3d473ea-3028-49fb-b769-c78616b388ae",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 312
-        }
+        },
+        "outputId": "a604e66b-95d7-44fb-f8d3-848fcedaf796"
       },
       "source": [
         "\"\"\"make sure we have the right GPU\n",
@@ -61,7 +61,7 @@
         {
           "output_type": "stream",
           "text": [
-            "Thu Sep  5 06:04:00 2019       \n",
+            "Mon Sep  9 14:17:51 2019       \n",
             "+-----------------------------------------------------------------------------+\n",
             "| NVIDIA-SMI 430.40       Driver Version: 418.67       CUDA Version: 10.1     |\n",
             "|-------------------------------+----------------------+----------------------+\n",
@@ -69,7 +69,7 @@
             "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
             "|===============================+======================+======================|\n",
             "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
-            "| N/A   39C    P8    10W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
+            "| N/A   68C    P0    28W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
             "+-------------------------------+----------------------+----------------------+\n",
             "                                                                               \n",
             "+-----------------------------------------------------------------------------+\n",
@@ -98,11 +98,7 @@
       "metadata": {
         "id": "p129YxxnihcV",
         "colab_type": "code",
-        "outputId": "ce0d1990-45c5-4c91-d1f2-86cedd666bbc",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        }
+        "colab": {}
       },
       "source": [
         "!wget -nc https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
@@ -115,257 +111,8 @@
         "os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'\n",
         "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
       ],
-      "execution_count": 2,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "--2019-09-05 06:04:07--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
-            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
-            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 1609 (1.6K) [text/plain]\n",
-            "Saving to: ‘rapids-colab.sh’\n",
-            "\n",
-            "\rrapids-colab.sh       0%[                    ]       0  --.-KB/s               \rrapids-colab.sh     100%[===================>]   1.57K  --.-KB/s    in 0s      \n",
-            "\n",
-            "2019-09-05 06:04:08 (510 MB/s) - ‘rapids-colab.sh’ saved [1609/1609]\n",
-            "\n",
-            "--2019-09-05 06:04:09--  https://github.com/rapidsai/notebooks-extended/raw/master/utils/env-check.py\n",
-            "Resolving github.com (github.com)... 13.114.40.48\n",
-            "Connecting to github.com (github.com)|13.114.40.48|:443... connected.\n",
-            "HTTP request sent, awaiting response... 301 Moved Permanently\n",
-            "Location: https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py [following]\n",
-            "--2019-09-05 06:04:09--  https://github.com/rapidsai/notebooks-contrib/raw/master/utils/env-check.py\n",
-            "Reusing existing connection to github.com:443.\n",
-            "HTTP request sent, awaiting response... 302 Found\n",
-            "Location: https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py [following]\n",
-            "--2019-09-05 06:04:10--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/env-check.py\n",
-            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...\n",
-            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 783 [text/plain]\n",
-            "Saving to: ‘env-check.py’\n",
-            "\n",
-            "env-check.py        100%[===================>]     783  --.-KB/s    in 0s      \n",
-            "\n",
-            "2019-09-05 06:04:10 (162 MB/s) - ‘env-check.py’ saved [783/783]\n",
-            "\n",
-            "Checking for GPU type:\n",
-            "*********************************************\n",
-            "Woo! Your instance has the right kind of GPU!\n",
-            "*********************************************\n",
-            "\n",
-            "Removing conflicting packages, will replace with RAPIDS compatible versions\n",
-            "Uninstalling xgboost-0.90:\n",
-            "  Successfully uninstalled xgboost-0.90\n",
-            "Uninstalling dask-1.1.5:\n",
-            "  Successfully uninstalled dask-1.1.5\n",
-            "Uninstalling distributed-1.25.3:\n",
-            "  Successfully uninstalled distributed-1.25.3\n",
-            "Installing conda\n",
-            "--2019-09-05 06:04:14--  https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
-            "Resolving repo.continuum.io (repo.continuum.io)... 104.18.200.79, 104.18.201.79, 2606:4700::6812:c94f, ...\n",
-            "Connecting to repo.continuum.io (repo.continuum.io)|104.18.200.79|:443... connected.\n",
-            "HTTP request sent, awaiting response... 200 OK\n",
-            "Length: 58468498 (56M) [application/x-sh]\n",
-            "Saving to: ‘Miniconda3-4.5.4-Linux-x86_64.sh’\n",
-            "\n",
-            "Miniconda3-4.5.4-Li 100%[===================>]  55.76M  65.1MB/s    in 0.9s    \n",
-            "\n",
-            "2019-09-05 06:04:15 (65.1 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]\n",
-            "\n",
-            "PREFIX=/usr/local\n",
-            "installing: python-3.6.5-hc3d631a_2 ...\n",
-            "Python 3.6.5 :: Anaconda, Inc.\n",
-            "installing: ca-certificates-2018.03.07-0 ...\n",
-            "installing: conda-env-2.6.0-h36134e3_1 ...\n",
-            "installing: libgcc-ng-7.2.0-hdf63c60_3 ...\n",
-            "installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...\n",
-            "installing: libffi-3.2.1-hd88cf55_4 ...\n",
-            "installing: ncurses-6.1-hf484d3e_0 ...\n",
-            "installing: openssl-1.0.2o-h20670df_0 ...\n",
-            "installing: tk-8.6.7-hc745277_3 ...\n",
-            "installing: xz-5.2.4-h14c3975_4 ...\n",
-            "installing: yaml-0.1.7-had09818_2 ...\n",
-            "installing: zlib-1.2.11-ha838bed_2 ...\n",
-            "installing: libedit-3.1.20170329-h6b74fdf_2 ...\n",
-            "installing: readline-7.0-ha6073c6_4 ...\n",
-            "installing: sqlite-3.23.1-he433501_0 ...\n",
-            "installing: asn1crypto-0.24.0-py36_0 ...\n",
-            "installing: certifi-2018.4.16-py36_0 ...\n",
-            "installing: chardet-3.0.4-py36h0f667ec_1 ...\n",
-            "installing: idna-2.6-py36h82fb2a8_1 ...\n",
-            "installing: pycosat-0.6.3-py36h0a5515d_0 ...\n",
-            "installing: pycparser-2.18-py36hf9f622e_1 ...\n",
-            "installing: pysocks-1.6.8-py36_0 ...\n",
-            "installing: ruamel_yaml-0.15.37-py36h14c3975_2 ...\n",
-            "installing: six-1.11.0-py36h372c433_1 ...\n",
-            "installing: cffi-1.11.5-py36h9745a5d_0 ...\n",
-            "installing: setuptools-39.2.0-py36_0 ...\n",
-            "installing: cryptography-2.2.2-py36h14c3975_0 ...\n",
-            "installing: wheel-0.31.1-py36_0 ...\n",
-            "installing: pip-10.0.1-py36_0 ...\n",
-            "installing: pyopenssl-18.0.0-py36_0 ...\n",
-            "installing: urllib3-1.22-py36hbe7ace6_0 ...\n",
-            "installing: requests-2.18.4-py36he2e5f8d_1 ...\n",
-            "installing: conda-4.5.4-py36_0 ...\n",
-            "installation finished.\n",
-            "WARNING:\n",
-            "    You currently have a PYTHONPATH environment variable set. This may cause\n",
-            "    unexpected behavior when running the Python interpreter in Miniconda3.\n",
-            "    For best results, please verify that your PYTHONPATH only points to\n",
-            "    directories of packages that are compatible with the Python interpreter\n",
-            "    in Miniconda3: /usr/local\n",
-            "Installing RAPIDS 0.10 packages\n",
-            "Please standby, this will take a few minutes...\n",
-            "\n",
-            "\n",
-            "==> WARNING: A newer version of conda exists. <==\n",
-            "  current version: 4.5.4\n",
-            "  latest version: 4.7.11\n",
-            "\n",
-            "Please update conda by running\n",
-            "\n",
-            "    $ conda update -n base conda\n",
-            "\n",
-            "\n",
-            "dask-cuda-0.10.0a    |  921 KB | : 100% 1.0/1 [00:02<00:00,  2.81s/it]               \n",
-            "jpeg-9c              |  251 KB | : 100% 1.0/1 [00:00<00:00,  8.31it/s]\n",
-            "ca-certificates-2019 |  145 KB | : 100% 1.0/1 [00:00<00:00, 10.93it/s]\n",
-            "joblib-0.13.2        |  180 KB | : 100% 1.0/1 [00:00<00:00,  7.30it/s]\n",
-            "blinker-1.4          |   13 KB | : 100% 1.0/1 [00:00<00:00, 13.37it/s]\n",
-            "dask-core-2.3.0      |  574 KB | : 100% 1.0/1 [00:00<00:00,  4.16it/s]              \n",
-            "cudf-0.10.0a         |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.74s/it]               \n",
-            "pyasn1-modules-0.2.6 |   47 KB | : 100% 1.0/1 [00:00<00:00, 10.35it/s]\n",
-            "jinja2-2.10.1        |   91 KB | : 100% 1.0/1 [00:00<00:00, 11.34it/s]\n",
-            "grpc-cpp-1.23.0      |  4.5 MB | : 100% 1.0/1 [00:01<00:00,  1.05s/it]               \n",
-            "boost-cpp-1.70.0     | 21.1 MB | : 100% 1.0/1 [00:08<00:00,  8.53s/it]               \n",
-            "idna-2.8             |  132 KB | : 100% 1.0/1 [00:00<00:00, 11.10it/s]\n",
-            "numba-0.45.1         |  3.1 MB | : 100% 1.0/1 [00:00<00:00,  1.04it/s]               \n",
-            "numpy-1.17.1         |  5.2 MB | : 100% 1.0/1 [00:01<00:00,  1.13s/it]               \n",
-            "yaml-0.1.7           |   78 KB | : 100% 1.0/1 [00:00<00:00, 12.23it/s]\n",
-            "click-7.0            |   61 KB | : 100% 1.0/1 [00:00<00:00, 12.19it/s]\n",
-            "python-dateutil-2.8. |  219 KB | : 100% 1.0/1 [00:00<00:00, 11.76it/s]\n",
-            "google-auth-1.6.3    |   45 KB | : 100% 1.0/1 [00:00<00:00, 11.44it/s]\n",
-            "gcsfs-0.3.0          |   19 KB | : 100% 1.0/1 [00:00<00:00, 15.31it/s]\n",
-            "tk-8.6.9             |  3.2 MB | : 100% 1.0/1 [00:00<00:00,  1.57it/s]               \n",
-            "pytz-2019.2          |  228 KB | : 100% 1.0/1 [00:00<00:00,  4.04it/s]              \n",
-            "pip-19.2.3           |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  1.82it/s]             \n",
-            "cachetools-2.1.0     |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.55it/s]\n",
-            "zict-1.0.0           |   10 KB | : 100% 1.0/1 [00:00<00:00, 14.96it/s]\n",
-            "cloudpickle-1.2.1    |   22 KB | : 100% 1.0/1 [00:00<00:00, 14.84it/s]\n",
-            "dask-cudf-0.10.0a    |   62 KB | : 100% 1.0/1 [00:01<00:00,  1.34s/it] \n",
-            "libcudf-0.10.0a      | 26.0 MB | : 100% 1.0/1 [00:07<00:00,  7.09s/it]               \n",
-            "pillow-6.1.0         |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.42it/s]               \n",
-            "libcumlprims-0.9.0   |  3.9 MB | : 100% 1.0/1 [00:02<00:00,  2.24s/it]                \n",
-            "cytoolz-0.10.0       |  429 KB | : 100% 1.0/1 [00:00<00:00,  6.44it/s]               \n",
-            "requests-oauthlib-1. |   19 KB | : 100% 1.0/1 [00:00<00:00, 14.79it/s]\n",
-            "six-1.12.0           |   22 KB | : 100% 1.0/1 [00:00<00:00, 13.72it/s]\n",
-            "bzip2-1.0.8          |  396 KB | : 100% 1.0/1 [00:00<00:00,  7.69it/s]               \n",
-            "llvmlite-0.29.0      | 19.9 MB | : 100% 1.0/1 [00:03<00:00,  3.15s/it]               \n",
-            "re2-2019.09.01       |  431 KB | : 100% 1.0/1 [00:00<00:00,  7.14it/s]               \n",
-            "zstd-1.4.0           |  928 KB | : 100% 1.0/1 [00:00<00:00,  4.81it/s]               \n",
-            "pycparser-2.19       |  173 KB | : 100% 1.0/1 [00:00<00:00,  9.89it/s]\n",
-            "urllib3-1.25.3       |  187 KB | : 100% 1.0/1 [00:00<00:00,  7.69it/s]\n",
-            "uriparser-0.9.3      |   49 KB | : 100% 1.0/1 [00:00<00:00, 10.99it/s]\n",
-            "gflags-2.2.2         |  177 KB | : 100% 1.0/1 [00:00<00:00,  9.88it/s]\n",
-            "libpng-1.6.37        |  343 KB | : 100% 1.0/1 [00:00<00:00,  8.51it/s]               \n",
-            "certifi-2019.6.16    |  149 KB | : 100% 1.0/1 [00:00<00:00, 12.26it/s]\n",
-            "libcblas-3.8.0       |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.55it/s]\n",
-            "_libgcc_mutex-0.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 18.61it/s]\n",
-            "psutil-5.6.3         |  322 KB | : 100% 1.0/1 [00:00<00:00,  7.35it/s]               \n",
-            "lz4-c-1.8.3          |  187 KB | : 100% 1.0/1 [00:00<00:00,  9.43it/s]\n",
-            "zlib-1.2.11          |  105 KB | : 100% 1.0/1 [00:00<00:00, 11.30it/s]\n",
-            "fsspec-0.4.4         |   39 KB | : 100% 1.0/1 [00:00<00:00, 13.92it/s]\n",
-            "thrift-cpp-0.12.0    |  2.4 MB | : 100% 1.0/1 [00:00<00:00,  2.11it/s]               \n",
-            "double-conversion-3. |   85 KB | : 100% 1.0/1 [00:00<00:00, 11.97it/s]\n",
-            "heapdict-1.0.0       |    7 KB | : 100% 1.0/1 [00:00<00:00, 16.08it/s]\n",
-            "libffi-3.2.1         |   46 KB | : 100% 1.0/1 [00:00<00:00, 13.59it/s]\n",
-            "chardet-3.0.4        |  190 KB | : 100% 1.0/1 [00:00<00:00,  8.67it/s]               \n",
-            "pynvml-8.0.3         |   30 KB | : 100% 1.0/1 [00:00<00:00,  2.88it/s]               \n",
-            "bokeh-1.3.4          |  4.0 MB | : 100% 1.0/1 [00:01<00:00,  1.30s/it]               \n",
-            "freetype-2.10.0      |  884 KB | : 100% 1.0/1 [00:00<00:00,  4.89it/s]               \n",
-            "nvstrings-0.10.0a    |  124 KB | : 100% 1.0/1 [00:01<00:00,  1.37s/it]               \n",
-            "libxgboost-0.90.rapi | 32.8 MB | : 100% 1.0/1 [00:09<00:00,  9.67s/it]               \n",
-            "pyasn1-0.4.6         |   52 KB | : 100% 1.0/1 [00:00<00:00, 12.03it/s]\n",
-            "brotli-1.0.7         |  1.0 MB | : 100% 1.0/1 [00:00<00:00,  4.92it/s]               \n",
-            "setuptools-41.2.0    |  634 KB | : 100% 1.0/1 [00:00<00:00,  4.53it/s]               \n",
-            "wheel-0.33.6         |   35 KB | : 100% 1.0/1 [00:00<00:00, 13.26it/s]\n",
-            "libgcc-ng-9.1.0      |  8.1 MB | : 100% 1.0/1 [00:01<00:00,  1.23s/it]               \n",
-            "libcuml-0.10.0a      | 29.3 MB | : 100% 1.0/1 [00:10<00:00, 10.22s/it]               \n",
-            "dlpack-0.2           |   12 KB | : 100% 1.0/1 [00:00<00:00,  1.39it/s] \n",
-            "pandas-0.24.2        | 11.1 MB | : 100% 1.0/1 [00:02<00:00,  2.32s/it]               \n",
-            "dask-cuml-0.8.0a     |   30 KB | : 100% 1.0/1 [00:01<00:00,  1.14s/it] \n",
-            "sqlite-3.29.0        |  1.9 MB | : 100% 1.0/1 [00:00<00:00,  2.74it/s]               \n",
-            "libgfortran-ng-7.3.0 |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  3.61it/s]               \n",
-            "toolz-0.10.0         |   46 KB | : 100% 1.0/1 [00:00<00:00, 11.64it/s]\n",
-            "asn1crypto-0.24.0    |  154 KB | : 100% 1.0/1 [00:00<00:00,  9.53it/s]\n",
-            "liblapack-3.8.0      |   10 KB | : 100% 1.0/1 [00:00<00:00, 15.48it/s]\n",
-            "packaging-19.0       |   23 KB | : 100% 1.0/1 [00:00<00:00,  3.70it/s]               \n",
-            "cryptography-2.7     |  607 KB | : 100% 1.0/1 [00:00<00:00,  3.62it/s]               \n",
-            "olefile-0.46         |   31 KB | : 100% 1.0/1 [00:00<00:00, 15.17it/s]\n",
-            "libopenblas-0.3.7    |  7.6 MB | : 100% 1.0/1 [00:01<00:00,  1.29s/it]               \n",
-            "libtiff-4.0.10       |  587 KB | : 100% 1.0/1 [00:00<00:00,  6.35it/s]               \n",
-            "cffi-1.12.3          |  218 KB | : 100% 1.0/1 [00:00<00:00,  8.74it/s]\n",
-            "ncurses-6.1          |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  1.19it/s]               \n",
-            "rmm-0.10.0a          |   14 KB | : 100% 1.0/1 [00:00<00:00,  1.98it/s] \n",
-            "libprotobuf-3.8.0    |  4.7 MB | : 100% 1.0/1 [00:01<00:00,  1.71s/it]               \n",
-            "pyopenssl-19.0.0     |   81 KB | : 100% 1.0/1 [00:00<00:00, 12.69it/s]\n",
-            "libevent-2.1.10      |  1.3 MB | : 100% 1.0/1 [00:00<00:00,  2.66it/s]              \n",
-            "librmm-0.10.0a       |   44 KB | : 100% 1.0/1 [00:00<00:00,  1.99it/s]               \n",
-            "scipy-1.3.1          | 18.1 MB | : 100% 1.0/1 [00:03<00:00,  3.18s/it]               \n",
-            "readline-8.0         |  441 KB | : 100% 1.0/1 [00:00<00:00,  6.79it/s]               \n",
-            "msgpack-python-0.6.1 |   89 KB | : 100% 1.0/1 [00:00<00:00, 13.43it/s]\n",
-            "requests-2.22.0      |   84 KB | : 100% 1.0/1 [00:00<00:00,  1.49it/s]\n",
-            "py-xgboost-0.90.rapi |   86 KB | : 100% 1.0/1 [00:00<00:00,  1.41it/s]               \n",
-            "cuml-0.10.0a         |  5.9 MB | : 100% 1.0/1 [00:02<00:00,  2.12s/it]               \n",
-            "libblas-3.8.0        |   10 KB | : 100% 1.0/1 [00:00<00:00, 13.55it/s]\n",
-            "c-ares-1.15.0        |  100 KB | : 100% 1.0/1 [00:00<00:00, 12.24it/s]\n",
-            "glog-0.4.0           |  104 KB | : 100% 1.0/1 [00:00<00:00, 12.09it/s]\n",
-            "pyarrow-0.14.1       |  2.8 MB | : 100% 1.0/1 [00:00<00:00,  1.27it/s]               \n",
-            "xz-5.2.4             |  366 KB | : 100% 1.0/1 [00:00<00:00,  7.05it/s]               \n",
-            "arrow-cpp-0.14.1     | 17.3 MB | : 100% 1.0/1 [00:02<00:00,  2.84s/it]               \n",
-            "icu-64.2             | 12.6 MB | : 100% 1.0/1 [00:01<00:00,  1.93s/it]               \n",
-            "distributed-2.3.2    |  370 KB | : 100% 1.0/1 [00:00<00:00,  5.21it/s]               \n",
-            "xgboost-0.90.rapidsd |   11 KB | : 100% 1.0/1 [00:01<00:00,  1.01s/it] \n",
-            "locket-0.2.0         |    6 KB | : 100% 1.0/1 [00:00<00:00, 15.02it/s]\n",
-            "snappy-1.1.7         |   39 KB | : 100% 1.0/1 [00:00<00:00, 14.68it/s]\n",
-            "pyjwt-1.7.1          |   17 KB | : 100% 1.0/1 [00:00<00:00, 13.20it/s]\n",
-            "libstdcxx-ng-9.1.0   |  4.0 MB | : 100% 1.0/1 [00:00<00:00,  1.61it/s]               \n",
-            "pysocks-1.7.0        |   26 KB | : 100% 1.0/1 [00:00<00:00, 15.49it/s]\n",
-            "dask-2.3.0           |    4 KB | : 100% 1.0/1 [00:00<00:00, 15.11it/s]\n",
-            "sortedcontainers-2.1 |   25 KB | : 100% 1.0/1 [00:00<00:00, 14.59it/s]\n",
-            "parquet-cpp-1.5.1    |    3 KB | : 100% 1.0/1 [00:00<00:00, 14.74it/s]\n",
-            "nccl-2.4.6.1         | 66.6 MB | : 100% 1.0/1 [00:12<00:00, 12.58s/it]               \n",
-            "google-auth-oauthlib |   18 KB | : 100% 1.0/1 [00:00<00:00, 13.18it/s]\n",
-            "cugraph-0.10.0a      |  1.3 MB | : 100% 1.0/1 [00:10<00:00,  3.84s/it]              \n",
-            "libcugraph-0.10.0a   | 11.3 MB | : 100% 1.0/1 [00:18<00:00, 18.20s/it]               \n",
-            "python-3.6.7         | 34.6 MB | : 100% 1.0/1 [00:05<00:00,  5.00s/it]               \n",
-            "openssl-1.1.1c       |  2.1 MB | : 100% 1.0/1 [00:00<00:00,  2.56it/s]               \n",
-            "tornado-6.0.3        |  636 KB | : 100% 1.0/1 [00:00<00:00,  4.46it/s]              \n",
-            "partd-1.0.0          |   16 KB | : 100% 1.0/1 [00:00<00:00, 13.42it/s]\n",
-            "markupsafe-1.1.1     |   26 KB | : 100% 1.0/1 [00:00<00:00, 14.08it/s]\n",
-            "fastavro-0.22.4      |  405 KB | : 100% 1.0/1 [00:00<00:00,  7.21it/s]               \n",
-            "cython-0.29.13       |  2.2 MB | : 100% 1.0/1 [00:00<00:00,  1.86it/s]               \n",
-            "rsa-3.4.2            |   31 KB | : 100% 1.0/1 [00:00<00:00, 13.62it/s]\n",
-            "pyyaml-5.1.2         |  184 KB | : 100% 1.0/1 [00:00<00:00, 10.32it/s]\n",
-            "scikit-learn-0.21.3  |  6.7 MB | : 100% 1.0/1 [00:01<00:00,  1.44s/it]               \n",
-            "decorator-4.4.0      |   11 KB | : 100% 1.0/1 [00:00<00:00, 15.90it/s]\n",
-            "oauthlib-3.0.1       |   82 KB | : 100% 1.0/1 [00:00<00:00,  9.79it/s]\n",
-            "pyparsing-2.4.2      |   57 KB | : 100% 1.0/1 [00:00<00:00, 13.40it/s]\n",
-            "tblib-1.4.0          |   12 KB | : 100% 1.0/1 [00:00<00:00, 15.25it/s]\n",
-            "cudatoolkit-10.0.130 | 380.0 MB | : 100% 1.0/1 [00:46<00:00, 46.86s/it]                \n",
-            "libnvstrings-0.10.0a | 24.8 MB | : 100% 1.0/1 [00:07<00:00,  7.82s/it]               \n",
-            "Copying shared object files to /usr/lib\n",
-            "\n",
-            "*********************************************\n",
-            "Your Google Colab instance is RAPIDS ready!\n",
-            "*********************************************\n"
-          ],
-          "name": "stdout"
-        }
-      ]
+      "execution_count": 0,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -386,18 +133,15 @@
       "metadata": {
         "id": "x1dLRTm168Tk",
         "colab_type": "code",
-        "outputId": "406a519a-e019-46cf-f0bf-7bba3dd2bb79",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        }
+        "colab": {}
       },
       "source": [
+        "# 5b4ecdb3cb122fb692a8349124960424\n",
         "# Info on how to get your api key (kaggle.json) here: https://github.com/Kaggle/kaggle-api#api-credentials\n",
         "!pip install kaggle\n",
         "!mkdir /root/.kaggle\n",
         "# plug api -- get your own API key\n",
-        "!echo '{\"username\":\"warobson\",\"key\":\"5b4ecdb3cb122fb692a8349124960424\"}' > /root/.kaggle/kaggle.json\n",
+        "!echo '{\"username\":\"warobson\",\"key\":\"\"}' > /root/.kaggle/kaggle.json\n",
         "!chmod 600 /root/.kaggle/kaggle.json\n",
         "# !kaggle datasets download\n",
         "!kaggle competitions download -c zillow-prize-1\n",
@@ -409,87 +153,8 @@
         "!unzip -q \"/content/train_2017.csv.zip\"\n",
         "!unzip -q \"/content/properties_2017.csv.zip\""
       ],
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Collecting kaggle\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e9/fc/0de659ea1f2096563204925b6660ae141f3d85bbe9e8a1571c3eb6cc1fdd/kaggle-1.5.5.tar.gz (56kB)\n",
-            "\u001b[K     |████████████████████████████████| 61kB 31.1MB/s \n",
-            "\u001b[?25hCollecting urllib3<1.25,>=1.21.1 (from kaggle)\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/01/11/525b02e4acc0c747de8b6ccdab376331597c569c42ea66ab0a1dbd36eca2/urllib3-1.24.3-py2.py3-none-any.whl (118kB)\n",
-            "\u001b[K     |████████████████████████████████| 122kB 35.9MB/s \n",
-            "\u001b[?25hRequirement already satisfied: six>=1.10 in /usr/local/lib/python3.6/site-packages (from kaggle) (1.12.0)\n",
-            "Requirement already satisfied: certifi in /usr/local/lib/python3.6/site-packages (from kaggle) (2019.6.16)\n",
-            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/site-packages (from kaggle) (2.8.0)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.6/site-packages (from kaggle) (2.22.0)\n",
-            "Collecting tqdm (from kaggle)\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/dc/88/d3213e2f3492daf09d8b41631ad6899f56db17ce83ea9c8a579902bafe5e/tqdm-4.35.0-py2.py3-none-any.whl (50kB)\n",
-            "\u001b[K     |████████████████████████████████| 51kB 29.7MB/s \n",
-            "\u001b[?25hCollecting python-slugify (from kaggle)\n",
-            "  Downloading https://files.pythonhosted.org/packages/a2/5d/bd30413c00bbed3945558aca07c55944073e1e30abeee1f06515281f9811/python-slugify-3.0.3.tar.gz\n",
-            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (2.8)\n",
-            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/site-packages (from requests->kaggle) (3.0.4)\n",
-            "Collecting text-unidecode==1.2 (from python-slugify->kaggle)\n",
-            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/79/42/d717cc2b4520fb09e45b344b1b0b4e81aa672001dd128c180fabc655c341/text_unidecode-1.2-py2.py3-none-any.whl (77kB)\n",
-            "\u001b[K     |████████████████████████████████| 81kB 32.1MB/s \n",
-            "\u001b[?25hBuilding wheels for collected packages: kaggle, python-slugify\n",
-            "  Building wheel for kaggle (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for kaggle: filename=kaggle-1.5.5-cp36-none-any.whl size=71896 sha256=d9815b0d9eae6d3594e8dc1a57a33174b1dbe24f623e1d688a92a2588f4e1be0\n",
-            "  Stored in directory: /root/.cache/pip/wheels/db/6a/80/6cd1892eb9b9b136333db3c74e16cba4e17e2c700f51541f06\n",
-            "  Building wheel for python-slugify (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for python-slugify: filename=python_slugify-3.0.3-py2.py3-none-any.whl size=4789 sha256=ccca227a48fbd1c2f5ba45701b0b52f1a12b7d1484ba459889128b3712c17b88\n",
-            "  Stored in directory: /root/.cache/pip/wheels/0f/96/ca/85f5b01165975402d1e37f8dd346df00dc39be1d0761bd17bb\n",
-            "Successfully built kaggle python-slugify\n",
-            "Installing collected packages: urllib3, tqdm, text-unidecode, python-slugify, kaggle\n",
-            "  Found existing installation: urllib3 1.25.3\n",
-            "    Uninstalling urllib3-1.25.3:\n",
-            "      Successfully uninstalled urllib3-1.25.3\n",
-            "Successfully installed kaggle-1.5.5 python-slugify-3.0.3 text-unidecode-1.2 tqdm-4.35.0 urllib3-1.24.3\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "application/vnd.colab-display-data+json": {
-              "pip_warning": {
-                "packages": [
-                  "urllib3"
-                ]
-              }
-            }
-          },
-          "metadata": {
-            "tags": []
-          }
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "Downloading sample_submission.csv.zip to /content\n",
-            " 51% 5.00M/9.86M [00:00<00:00, 15.9MB/s]\n",
-            "100% 9.86M/9.86M [00:00<00:00, 29.2MB/s]\n",
-            "Downloading properties_2016.csv.zip to /content\n",
-            " 91% 145M/159M [00:02<00:00, 54.3MB/s]\n",
-            "100% 159M/159M [00:02<00:00, 59.1MB/s]\n",
-            "Downloading zillow_data_dictionary.xlsx.zip to /content\n",
-            "  0% 0.00/15.7k [00:00<?, ?B/s]\n",
-            "100% 15.7k/15.7k [00:00<00:00, 14.5MB/s]\n",
-            "Downloading train_2016_v2.csv.zip to /content\n",
-            "  0% 0.00/632k [00:00<?, ?B/s]\n",
-            "100% 632k/632k [00:00<00:00, 212MB/s]\n",
-            "Downloading train_2017.csv.zip to /content\n",
-            "  0% 0.00/825k [00:00<?, ?B/s]\n",
-            "100% 825k/825k [00:00<00:00, 225MB/s]\n",
-            "Downloading properties_2017.csv.zip to /content\n",
-            " 93% 129M/138M [00:05<00:00, 20.1MB/s]\n",
-            "100% 138M/138M [00:05<00:00, 26.7MB/s]\n"
-          ],
-          "name": "stdout"
-        }
-      ]
+      "execution_count": 0,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -507,7 +172,7 @@
       "metadata": {
         "id": "6n75DyJ-dm4B",
         "colab_type": "code",
-        "outputId": "0b450180-fedb-4251-f106-08dad9ee50f1",
+        "outputId": "64ac687e-39d6-4bb1-f4b7-5476c9de3b84",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 173
@@ -590,32 +255,33 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "uynoUxpx8Xsn",
+        "id": "2EfApIzCfEtr",
         "colab_type": "code",
-        "outputId": "8b688642-b7d0-4785-b428-4c5130419e17",
+        "outputId": "bc1e37d1-9ab8-4561-fa39-5af420480a72",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 121
+          "height": 156
         }
       },
       "source": [
-        "# import train 2016  data\n",
-        "train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n",
-        "                          parse_dates=[\"transactiondate\"])\n",
-        "# peek display 2016 train\n",
-        "print(train2016.head())"
+        "# import 2016 properties\n",
+        "prop2016 = cudf.read_csv('/content/properties_2016.csv')\n",
+        "# peek display 2016 properties\n",
+        "print(prop2016.head())"
       ],
-      "execution_count": 6,
+      "execution_count": 154,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "   parcelid  logerror transactiondate\n",
-            "0  11016594    0.0276      2016-01-01\n",
-            "1  14366692   -0.1684      2016-01-01\n",
-            "2  12098116   -0.0040      2016-01-01\n",
-            "3  12643413    0.0218      2016-01-02\n",
-            "4  14432541   -0.0050      2016-01-02\n"
+            "   parcelid airconditioningtypeid  ... taxdelinquencyyear censustractandblock\n",
+            "0  10754147                  null  ...               null                null\n",
+            "1  10759547                  null  ...               null                null\n",
+            "2  10843547                  null  ...               null                null\n",
+            "3  10859147                  null  ...               null                null\n",
+            "4  10879947                  null  ...               null                null\n",
+            "\n",
+            "[5 rows x 58 columns]\n"
           ],
           "name": "stdout"
         }
@@ -624,33 +290,32 @@
     {
       "cell_type": "code",
       "metadata": {
-        "id": "2EfApIzCfEtr",
+        "id": "uynoUxpx8Xsn",
         "colab_type": "code",
-        "outputId": "fc64b5dd-72a4-44d2-bfe4-2d5a6760a381",
+        "outputId": "b64b7b32-c1f9-4cf3-c50d-36e90dc51a64",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 156
+          "height": 121
         }
       },
       "source": [
-        "# import 2016 properties\n",
-        "prop2016 = cudf.read_csv('/content/properties_2016.csv')\n",
-        "# peek display 2016 properties\n",
-        "print(prop2016.head())"
+        "# import train 2016  data\n",
+        "train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n",
+        "                          parse_dates=[\"transactiondate\"])\n",
+        "# peek display 2016 train\n",
+        "print(train2016.head())"
       ],
-      "execution_count": 7,
+      "execution_count": 155,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "   parcelid airconditioningtypeid  ... taxdelinquencyyear censustractandblock\n",
-            "0  10754147                  null  ...               null                null\n",
-            "1  10759547                  null  ...               null                null\n",
-            "2  10843547                  null  ...               null                null\n",
-            "3  10859147                  null  ...               null                null\n",
-            "4  10879947                  null  ...               null                null\n",
-            "\n",
-            "[5 rows x 58 columns]\n"
+            "   parcelid  logerror transactiondate\n",
+            "0  11016594    0.0276      2016-01-01\n",
+            "1  14366692   -0.1684      2016-01-01\n",
+            "2  12098116   -0.0040      2016-01-01\n",
+            "3  12643413    0.0218      2016-01-02\n",
+            "4  14432541   -0.0050      2016-01-02\n"
           ],
           "name": "stdout"
         }
@@ -679,7 +344,7 @@
       "metadata": {
         "id": "o4CvSIcwm4B2",
         "colab_type": "code",
-        "outputId": "8857aa84-5f36-4e74-8600-afc8d29ef16a",
+        "outputId": "4e59a51a-ebd6-4fe5-b037-3165e57e3b85",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 156
@@ -733,7 +398,7 @@
         "# what's the data frame look like?\n",
         "print(df_train.head())"
       ],
-      "execution_count": 8,
+      "execution_count": 156,
       "outputs": [
         {
           "output_type": "stream",
@@ -804,13 +469,15 @@
       "metadata": {
         "id": "B3-1V93smA9A",
         "colab_type": "code",
-        "outputId": "61e3186a-d6f6-4b52-8459-a1052ccb35d3",
+        "outputId": "52e1a5d7-869a-443f-ac2d-40504992dc14",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 69
+          "height": 156
         }
       },
       "source": [
+        "print(f'before\\n{df_train.just_hottub_or_spa.value_counts()}\\n')\n",
+        "\n",
         "# if poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null\n",
         "conditions = ((df_train['pool_count'] == 1) \n",
         "              & (df_train['has_hottub_or_spa'] == 1) \n",
@@ -818,13 +485,18 @@
         "# then just_hottub_or_spa = 0\n",
         "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
         "\n",
-        "print(df_train.just_hottub_or_spa.value_counts())"
+        "print(f'after\\n{df_train.just_hottub_or_spa.value_counts()}')\n"
       ],
-      "execution_count": 10,
+      "execution_count": 158,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
+            "before\n",
+            "1.0    1161\n",
+            "Name: just_hottub_or_spa, dtype: int32\n",
+            "\n",
+            "after\n",
             "0.0    1204\n",
             "1.0    1161\n",
             "Name: just_hottub_or_spa, dtype: int32\n"
@@ -885,7 +557,7 @@
       "metadata": {
         "id": "FBgs7zJm3qk-",
         "colab_type": "code",
-        "outputId": "93f2d4bf-264e-4f92-ddcb-5cda84c0e80e",
+        "outputId": "78c76ac5-2b7f-4f98-9615-8a335bc3214e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 69
@@ -900,7 +572,7 @@
         "\n",
         "print(df_train.just_hottub_or_spa.value_counts())"
       ],
-      "execution_count": 12,
+      "execution_count": 160,
       "outputs": [
         {
           "output_type": "stream",
@@ -985,7 +657,7 @@
       "metadata": {
         "id": "OZM6lXmmpj5k",
         "colab_type": "code",
-        "outputId": "989a44db-4064-4d82-e217-6c1b409871a4",
+        "outputId": "ecf62d1d-b036-41ad-8052-a3090ae590ef",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 52
@@ -995,7 +667,7 @@
         "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
         "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
-      "execution_count": 15,
+      "execution_count": 163,
       "outputs": [
         {
           "output_type": "stream",
@@ -1025,7 +697,7 @@
       "metadata": {
         "id": "i3YRZgU_qZhA",
         "colab_type": "code",
-        "outputId": "94e69d14-8a62-424f-e0ad-4598cec8bf49",
+        "outputId": "e45a7a96-2e1d-47d2-a0bd-48ece42cbb6e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 52
@@ -1049,7 +721,7 @@
         "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
         "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
       ],
-      "execution_count": 16,
+      "execution_count": 164,
       "outputs": [
         {
           "output_type": "stream",
@@ -1106,7 +778,7 @@
       "metadata": {
         "id": "gbbUIbwJ-ouS",
         "colab_type": "code",
-        "outputId": "aa91093c-b914-4a44-ebf9-6ff93d5ab5e9",
+        "outputId": "310a4cdf-01a0-4fc3-ed1b-0e2f5e668518",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 121
@@ -1117,7 +789,7 @@
         "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
         "print(df_train.loc[conditions][garage].head())"
       ],
-      "execution_count": 18,
+      "execution_count": 166,
       "outputs": [
         {
           "output_type": "stream",
@@ -1125,9 +797,9 @@
             "    garagecarcnt  garage_sqft\n",
             "16           2.0          0.0\n",
             "29           1.0          0.0\n",
-            "36           2.0          0.0\n",
-            "54           2.0          0.0\n",
-            "65           1.0          0.0\n"
+            "32           2.0          0.0\n",
+            "49           1.0          0.0\n",
+            "52           2.0          0.0\n"
           ],
           "name": "stdout"
         }
@@ -1244,7 +916,7 @@
       "metadata": {
         "id": "yHZH4rMNLfBA",
         "colab_type": "code",
-        "outputId": "53844d43-16cb-41f2-8684-5268848f1476",
+        "outputId": "97106bb4-10f2-49a9-f821-03a3972db136",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 208
@@ -1303,7 +975,7 @@
         "# let's see how out unit counts look\n",
         "print(df_train.unitcnt.value_counts())"
       ],
-      "execution_count": 22,
+      "execution_count": 170,
       "outputs": [
         {
           "output_type": "stream",
@@ -1396,17 +1068,17 @@
       "metadata": {
         "id": "8lYcO_T5XKNN",
         "colab_type": "code",
-        "outputId": "e7ff645b-ac87-4039-d135-db8fd49855da",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 311
-        }
+        },
+        "outputId": "596cfad3-890d-4241-b8b8-347673082a7f"
       },
       "source": [
         "# how we'd normally take care of this\n",
         "df_train['taxdelinquencyflag'].fillna(0)"
       ],
-      "execution_count": 0,
+      "execution_count": 172,
       "outputs": [
         {
           "output_type": "error",
@@ -1415,9 +1087,9 @@
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
             "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-84-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1141\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1143\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1145\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/dataframe/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    717\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m         ):\n\u001b[0;32m--> 719\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    720\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-172-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1165\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1167\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1169\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/column/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    720\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         ):\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
             "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
           ]
         }
@@ -1449,7 +1121,7 @@
       "metadata": {
         "id": "Svp6J0cJ5dL0",
         "colab_type": "code",
-        "outputId": "fd9373e8-8a1f-45a3-a7bd-387f0e678d2c",
+        "outputId": "03862711-e104-4954-bf9c-61bd51b3a9e3",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 69
@@ -1471,7 +1143,7 @@
         "# display values in tax delinquency flag column\n",
         "print(df_train['taxdelinquencyflag'].value_counts())"
       ],
-      "execution_count": 24,
+      "execution_count": 173,
       "outputs": [
         {
           "output_type": "stream",
@@ -1501,7 +1173,7 @@
       "metadata": {
         "id": "lHh95mAIMrMy",
         "colab_type": "code",
-        "outputId": "37c584b1-76a4-4df7-ca71-23a9d50f165a",
+        "outputId": "832c405d-d89f-4b85-d77d-7a6726a61907",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 69
@@ -1510,7 +1182,7 @@
       "source": [
         "print(df_train.taxdelinquencyflag.value_counts())"
       ],
-      "execution_count": 25,
+      "execution_count": 174,
       "outputs": [
         {
           "output_type": "stream",
@@ -1528,7 +1200,7 @@
       "metadata": {
         "id": "6Bic66I9LfGC",
         "colab_type": "code",
-        "outputId": "af959592-7a80-42ee-8fda-3e18d6b9c514",
+        "outputId": "baaa5387-bbd7-4242-a336-0b6b90606935",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 243
@@ -1552,7 +1224,7 @@
         "# what've we got? \n",
         "print(df_train.taxdelinquencyyear.value_counts())"
       ],
-      "execution_count": 26,
+      "execution_count": 175,
       "outputs": [
         {
           "output_type": "stream",
@@ -1597,8 +1269,8 @@
       },
       "source": [
         "# make a copy of dataframe at this point\n",
-        "# safe = df_train.copy()\n",
-        "df_train = safe.copy()"
+        "# pre_string = df_train.copy()\n",
+        "df_train = pre_string.copy()"
       ],
       "execution_count": 0,
       "outputs": []
@@ -1608,7 +1280,7 @@
       "metadata": {
         "id": "Sg0eN-K1QdZy",
         "colab_type": "code",
-        "outputId": "3cdd2ef6-fb68-46bd-a611-5dd3d9ff45d2",
+        "outputId": "a90de47f-5c88-4834-df44-75a9dedcd07c",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 489
@@ -1671,7 +1343,7 @@
         "\"\"\"\n",
         "print(df_train[['census_tractnumber', 'block_number']].head())"
       ],
-      "execution_count": 29,
+      "execution_count": 177,
       "outputs": [
         {
           "output_type": "stream",
@@ -1726,7 +1398,7 @@
       "metadata": {
         "id": "xhCosNpXvTVU",
         "colab_type": "code",
-        "outputId": "3f70a009-a211-46fd-ecc9-731be3d15fe1",
+        "outputId": "2d969756-decb-4912-94f6-19836eb0323a",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 86
@@ -1749,7 +1421,7 @@
         "# drop columns with more than 95% null values\n",
         "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
       ],
-      "execution_count": 30,
+      "execution_count": 178,
       "outputs": [
         {
           "output_type": "stream",
@@ -1792,7 +1464,7 @@
       "metadata": {
         "id": "yB2lzAyopS_S",
         "colab_type": "code",
-        "outputId": "06922e76-c61b-4212-afc5-4f3a51eaaa09",
+        "outputId": "db6c7add-5452-4535-8948-a426654851b7",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 225
@@ -1804,7 +1476,7 @@
         "# let's see what we've got\n",
         "print(df_train['unitcnt'].value_counts())"
       ],
-      "execution_count": 31,
+      "execution_count": 179,
       "outputs": [
         {
           "output_type": "stream",
@@ -1843,7 +1515,7 @@
       "metadata": {
         "id": "-icFDeLSoJwl",
         "colab_type": "code",
-        "outputId": "03d9a89b-6e21-4bba-ae75-aa229c744dcf",
+        "outputId": "b1ed39c3-3a14-4dc1-eb48-b3429da5cffe",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1862,7 +1534,7 @@
         "\n",
         "print(df_train.pool_sqft.isna().sum())"
       ],
-      "execution_count": 32,
+      "execution_count": 180,
       "outputs": [
         {
           "output_type": "stream",
@@ -1890,10 +1562,10 @@
       "metadata": {
         "id": "3pVABkZTYK9F",
         "colab_type": "code",
-        "outputId": "e926b021-7cbb-4acd-96c8-2a15fbe1afd7",
+        "outputId": "b5cb7ced-7458-4971-936c-b6e5d33bc126",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 86
+          "height": 173
         }
       },
       "source": [
@@ -1902,6 +1574,11 @@
         "#land_tax\n",
         "#total_property_tax_2016\n",
         "#2)recalculate total_parcel_tax = structure_tax + land_tax\n",
+        "print(df_train.total_property_tax_2016.isnull().sum())\n",
+        "print(df_train.structure_tax.isnull().sum())\n",
+        "print(df_train.total_parcel_tax.isnull().sum())\n",
+        "print(df_train.land_tax.isnull().sum())\n",
+        "print()\n",
         "\n",
         "# total_parcel_tax =structure_tax + land_tax\n",
         "#->structure_tax=total_parcel_tax -land_tax\n",
@@ -1920,11 +1597,16 @@
         "print(df_train.total_parcel_tax.isnull().sum())\n",
         "print(df_train.land_tax.isnull().sum())"
       ],
-      "execution_count": 33,
+      "execution_count": 181,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
+            "6\n",
+            "380\n",
+            "1\n",
+            "1\n",
+            "\n",
             "6\n",
             "380\n",
             "1\n",
@@ -1939,7 +1621,7 @@
       "metadata": {
         "id": "8SID48LOpYvu",
         "colab_type": "code",
-        "outputId": "842c0ccb-1710-4e73-f85a-599d5b27988b",
+        "outputId": "6d20a3ba-4360-4554-908d-f6d673aece12",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1950,7 +1632,7 @@
         "df_train = df_train.drop(['regionidcounty'], axis=1)\n",
         "df_train.shape"
       ],
-      "execution_count": 34,
+      "execution_count": 182,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1962,7 +1644,7 @@
           "metadata": {
             "tags": []
           },
-          "execution_count": 34
+          "execution_count": 182
         }
       ]
     },
@@ -1971,7 +1653,7 @@
       "metadata": {
         "id": "tWmM2J8_pkg1",
         "colab_type": "code",
-        "outputId": "e544a196-1e32-4d18-a6b5-98c49a57589e",
+        "outputId": "6362e07f-e363-4884-b0c5-9380b5fee956",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 34
@@ -1984,7 +1666,7 @@
         "df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0] = np.nan\n",
         "print(df_train.bedroomcnt.isnull().sum())"
       ],
-      "execution_count": 35,
+      "execution_count": 183,
       "outputs": [
         {
           "output_type": "stream",
@@ -2000,10 +1682,10 @@
       "metadata": {
         "id": "3qnP2L9LpmeJ",
         "colab_type": "code",
-        "outputId": "2e863b26-9267-45f2-f31f-3305f5577ce3",
+        "outputId": "c0eabce4-3232-4435-8733-779526f18c57",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 104
+          "height": 208
         }
       },
       "source": [
@@ -2014,6 +1696,12 @@
         "#                                bedroomcnt      1421\n",
         "#                              roomcnt           1416\n",
         "\n",
+        "print(df_train.total_bath.isna().sum())\n",
+        "print(df_train.full_bath.isnull().sum())\n",
+        "print(df_train.half_bath.isnull().sum())\n",
+        "print(df_train.bedroomcnt.isnull().sum())\n",
+        "print(df_train.roomcnt.isnull().sum())\n",
+        "print()\n",
         "\n",
         "# roomcnt = (full_bath + half_bath) + bedroomcnt\n",
         "# total_bath = fullbath+ 0.5(half_bath)\n",
@@ -2040,11 +1728,17 @@
         "print(df_train.bedroomcnt.isnull().sum())\n",
         "print(df_train.roomcnt.isnull().sum())"
       ],
-      "execution_count": 36,
+      "execution_count": 184,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
+            "1165\n",
+            "1182\n",
+            "1182\n",
+            "1421\n",
+            "69700\n",
+            "\n",
             "1165\n",
             "1182\n",
             "1182\n",
@@ -2074,7 +1768,7 @@
       "metadata": {
         "id": "IW4CG2InpolD",
         "colab_type": "code",
-        "outputId": "288444a4-d153-4624-c961-b3956092d87e",
+        "outputId": "02375307-54e2-432b-8b87-1397c73d56b2",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 260
@@ -2129,7 +1823,7 @@
         "print(f'AFTER\\n{df_train.numberofstories.value_counts()}\\n'\n",
         "      f'{df_train.numberofstories.isnull().sum()} remaining null values')"
       ],
-      "execution_count": 37,
+      "execution_count": 185,
       "outputs": [
         {
           "output_type": "stream",
@@ -2158,7 +1852,7 @@
       "metadata": {
         "id": "AHcMsDCxprd4",
         "colab_type": "code",
-        "outputId": "516954f4-d1d9-4876-e3a4-4545d865d9f6",
+        "outputId": "30481b2c-e035-4478-d62f-63e10a09c17e",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 295
@@ -2190,7 +1884,7 @@
         "print(f'AFTER\\n{df_train.fireplace_count.value_counts()}\\n'\n",
         "      f'{df_train.fireplace_count.isnull().sum()} remaining null values')"
       ],
-      "execution_count": 38,
+      "execution_count": 186,
       "outputs": [
         {
           "output_type": "stream",
@@ -2221,7 +1915,7 @@
       "metadata": {
         "id": "FIuSWoJspt3H",
         "colab_type": "code",
-        "outputId": "d8b6ef02-d214-4530-ce3a-c6efc5bd01cf",
+        "outputId": "cb11c3a1-1658-4bce-cbde-a1a47ccdc0a8",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 317
@@ -2241,7 +1935,7 @@
         "# display the graph\n",
         "plt.show()"
       ],
-      "execution_count": 41,
+      "execution_count": 187,
       "outputs": [
         {
           "output_type": "display_data",
@@ -2262,7 +1956,7 @@
       "metadata": {
         "id": "KOHPCFRSp5y9",
         "colab_type": "code",
-        "outputId": "471d6f7c-607a-4520-d219-3ab56500c004",
+        "outputId": "e0f3fe2e-a82a-49e8-a798-a3f79a30bcee",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 274
@@ -2274,12 +1968,12 @@
         "# display the graph\n",
         "plt.show()"
       ],
-      "execution_count": 42,
+      "execution_count": 188,
       "outputs": [
         {
           "output_type": "display_data",
           "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGxxJREFUeJzt3X1wFPXBB/Dv3p0XEuCa3CVAeLVO\nDY1TkTYOYVILEpDQCtiGmWJ5iQ4obS0WWhlFaosVBE+Uygga7EOLqAP/JEMr9CnY4cUChUJ9oXEo\noSFAZiAvdwlPCJLEu/09f8Cdebm73Mve7W5+389fuHv72+/u7X3vsrfeKkIIASIikopF7wBERJR6\nLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKS\nEMufiEhCNr0DtLRch6oKuFyD4PW26R0nLsyuD2ZPPbPmBvpPdotFQVbWwITH1L38VVVAVUXw32bF\n7Ppg9tQza26A2bviaR8iIgmx/ImIJMTyJyKSUFTl73a7UVxcjLFjx6K6uhoA0NLSgscffxwlJSWY\nNWsWli5diubm5qSGJSIibUT1he/UqVNRVlaG+fPnB6cpioLHHnsMhYWFAG6+QbzyyitYt25dcpJS\n0rUePwZPZQV8zV7AYgFUFTanC9mlc+CYWNT7MQEWCxyTJmPYgkcijtlzrHizBcbJmVUS97YmItpt\nCrmvbqkOMW5grKtH/472M2dCzutrPTanCxnjxuHz06e75QMQ9rntOi8w7fP/nkPrh4cBVe31/Na/\n+/aX83pSFKDnbcFvrU8ZOBCKokBtawtOi5nFggFjx8LX0HhzW9LSgM7ObusMtx/rx92Nzi98vfat\nJqxWwO/vljPca8IolFhu4F5cXIzy8nLk5eX1mrdv3z7s3LkT27dvjymA19sGVRXIyRmMpqZrMS1r\nFP0he+vxY2jYsR2is7PXYxS7HUPLHgWAsI8BAMf9U7od7KHGDIwVyxtAuHG+tvSnUO76ZtTjaCHa\nbYq0PyMKVZ6JrsdqBaAAfl9088JkcNw/BenpdjT8774oN0ZHEfZjKvV8TcSra8dYLApcrkEJj6nJ\nOX9VVbFz504UFxdrMRzpwFNZEbZARGcnPJUVER8D4OanwT7GDIyVaDbR2YlL77wX0zhaiHab+tpX\nYUUorLjX4/eHLv5w88JkaP3wMBr2fRB5XUZhgOIHer8mjEST6/zXrFmDjIwMLFiwIOZlu76D5eQM\n1iKOLsyevbol8vc1vj7mAwBUtdt+CDemr6U5pv0VbpwOjzfl+z3abeprf8YrVesJKZ7TNLLr8ZpI\nhNbHesLl73a7cfHiRZSXl8Niif0PCZ720Vcguy3LGfLcdIAtywkAER8Di6Xbfgg3pi3LGdP+CjdO\nWrYr5fs92m3qa38msv5UrCekwOubbwLR6/GaiJfhTvts3LgRVVVV2LJlC+x2e8JhSD/ZpXOghHkO\nFbsd2aVzIj4GAByTJvc5ZmCsRLMpdjtGL5wfZonkiXab+tpXYSlK+FnxrsdqBaxhPueFmhcmg2PS\nZAwteSDyuowiwn5MpZ6vCSOxPv/888/39aC1a9fiueeeQ2NjI/bt24fKykpMmDABTz31FG677Tbs\n3r0bu3btwpEjR/Dggw/GFODGjU4IAQwcmIbPP4/jHKkB9IfsaSNH4TaXC+0XLkC9cePmpzwhYHO6\nMOTheXBMLOr9mACLBY7J9/f6Yqvn47uOFYtw44yaPjXl+z3abQq7ryKwOV0YMn8BfDc+h8/j6T0v\nivXYnC4MKpwIf+u1L/PNm49B3/xm6Oe2x7xABovDgY5Ll26eO+/y/I6aXIT/q2/6cl5PoUr31vqU\ngQNhSUu7+R3FrWkxs1gw4OtfB1Rxc1vS0nr9JRJuPzrG3Q3F6eq1bzVhtXbfnjCviXh17RhFUZCR\nkfiH7Ziu9kkGnvbRF7Prw6zZzZob6D/ZDXHah4iIzInlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIU1+0pmIKFZa3eWN4sPyJ6KU63kHMl+zFw07tgMA3wBShKd9\niCjltLrLG8WP5U9EKRfuBjQpuzENsfyJKPVsTldM00l7LH8iSjmt7vJG8eMXvkSUcoEvdXm1j35Y\n/kSkC8fEIpa9jnjah4hIQix/IiIJsfyJiCTE8icikhDLn4hIQix/IiIJsfyJiCTE8icikhDLn4hI\nQn2Wv9vtRnFxMcaOHYvq6urg9NraWsydOxclJSWYO3cuLly4kMycRESkoT5/3mHq1KkoKyvD/Pnz\nu01fvXo15s2bh4ceegh/+tOf8Jvf/AY7duxIWlC91b/7Nlo/PAyoKmCxwDFpMjK+dic8lRWobvYC\nFgugqkn9jZKudz4KUhTAbgc6OqLK0HOM6l6PSJ1Azvq3/wh88UXMy4fLPiA/H6OfeqbX9EuvutF+\n5syXE9LTgRs3Yl6vFvTc74kwa27AYNnT05H3+pu6RlCEECKaBxYXF6O8vBx5eXnwer0oKSnBiRMn\nYLVa4ff7UVhYiP3798PpdMYUwOttg6oK5OQMRlPTtbg2Itnq330brYcO9p6hKECI3afY7Rha9qim\nbwA973zUl1AZYh3DzHq+AfQqfiK9xfAG0LUfLRYFLteghFcf1zn/K1euYOjQobBarQAAq9WKIUOG\n4MqVKwkHMqLWDw+HnhHmfTMZdyQKdeejSEJliHUMM+tZ9Cx+Mhyd/uoM0P1XPbu+g+XkDNYxSXjV\nqhrzMr6WZk23p7qlOeEM8YxhZt22XcccROHE0hFa92Nc5Z+bm4uGhgb4/f7gaZ/Gxkbk5ubGPJYZ\nTvsEzqXHwpbl1HR7bFnOmG9x1zNDPGOYmWGPJ6Jboj1GDXPax+VyIT8/H3v27AEA7NmzB/n5+TGf\n7zcLx6TJoWcoSujJSbgjUag7H0USKkOsY5jZgPz8iP9NpLv0dF1X3+cXvmvXrsX+/fvh8XiQlZWF\nzMxM7N27FzU1NVi5ciVaW1vhcDjgdrtxxx13xBzAFJ/8EflqH5+Jr/bRU6JX+4Rjhqt9SHIxXu2T\njE/+UV/tkyxmKf9ImF0fzJ56Zs0N9J/sup72ISIic2P5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJ\niOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kRE\nEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhW6ID\nHDx4EJs2bYIQAkIILF26FNOnT9ciGxERJUlC5S+EwNNPP4333nsPeXl5+M9//oMf/ehHmDZtGiwW\n/lEhi9bjx+CprICv2Qub04Xs0jlwTCzSO1Zc+tO2mEl/2u+htgWA4bYv4U/+FosF165dAwBcu3YN\nQ4YMYfFLpPX4MTTs2A7R2QkA8DV70bBjOwDofnDHqj9ti5n0p/0ealvq//gHAALw+4PTjLB9CbW0\noih47bXX8MQTT2DKlCn42c9+BrfbrVU2MgFPZUXwQA8QnZ3wVFbolCh+/WlbzKQ/7fdQ2wK/L1j8\nAUbYvoQ++ft8PmzduhVvvPEGCgoK8K9//QvLly/H3r17MXDgwKjGcLkGBf+dkzM4kTi6kjV7dUtz\nyOm+luaU7BMt15HqbTHrMaN17lTu92Tv83DbEkqs26d19oTK/8yZM2hsbERBQQEAoKCgAOnp6aip\nqcG4ceOiGsPrbYOqCuTkDEZT07VE4uhG5uy2LCd8zd6Q05O9T7Te76ncFrMeM8nInar9nop9Hm5b\nwj022jxds1ssSrcPzfFK6LTPsGHDUF9fj/PnzwMAampq4PV6MXr06ISDkTlkl86BYrd3m6bY7cEv\nucykP22LmfSn/R5qW2C1AVZrt0lG2L6EPvnn5OTg+eefx7Jly6AoCgBg3bp1yMzM1CQcGV/gCyuj\nXckQj/60LWbSn/Z7uG0JNU3v7VOEEELPADztoy9m14dZs5s1N9B/shvitA8REZkTy5+ISEIsfyIi\nCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIJ38wl1aqf/Clw40a3aYHfyvj8\nv+fQ+uFhQFUBiwWOSZPReuhg8jMlfQ3Jw+z6MGt2s+YGdMyuKIAQgMUCqCp/2ycglt/2CVX8QYEd\nTERkcIrdjqFlj0b9BsDf9glX/ACLn4hMwwh38jJX+RMR9RPR3vQlWVj+REQ6sDlduq7fXOWfnh5+\n3q2byRARGZ0R7uRlqvLPe/3NkG8ANqcLwxY/Dsf9U25+ow7cvNrn/ikpTkhE1EPgg+mtbrI5XTF9\n2Zssprrax6iYXR/MnnpmzQ30n+xyXu1DRESaYPkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQSSvhmLh0dHVi3bh3+8Y9/IC0tDePHj8eaNWu0yEZEREmScPlv2LABaWlp2Ldv\nHxRFgcfj0SIXEZHptR4/Bk9lBXzNXsPcwSsgofK/fv06du/ejcOHD0O59eNF2dnZmgQjIjKz1uPH\n0LBjO0RnJ4Cbv9/fsGM7ABjiDSChc/51dXXIzMzE5s2bUVpaioULF+LUqVNaZSMiMi1PZUWw+AOM\ncAevgIQ++fv9ftTV1eGuu+7CM888g08//RQ/+clP8MEHH2DQoOh+da7rr9Pl5AxOJI6umF0fzJ56\nZs0NpDZ7dUtzyOm+lua4cmidPaHyz83Nhc1mw8yZMwEA99xzD7KyslBbW4u77747qjH4k876YnZ9\nmDW7WXMDqc9uy3KGvFWjLcsZcw7D/aSz0+lEYWEhjh49CgCora2F1+vFmDFjEg5GRGRm2aVzoNjt\n3aYZ4Q5eAQlf7fPb3/4Wq1atgtvths1mw8svvwyHw6FFNiIi0wp8qdsvr/YBgFGjRuGdd97RIgsR\nUb/imFhkmLLvif+HLxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUlIs/LfvHkzxo4di+rqaq2GJCKi\nJLFpMchnn32GTz75BCNGjNBiuJi1Hj8GT2UFfM1e2JwuZJfOgWNiUZ/z4h3z0qtutJ85E3xst7c7\nRQFuuw3o7AQsFkBVg8sDCI4ZmKc3M79VM3vqmTU3YNzsef+zXZf1Jlz+nZ2deOGFF/Dqq6+irKxM\ni0wxaT1+DA07tkN0dgIAfM1eNOzYHpwfbl6kN4BIY149+vduxd+LEDeLHwiWu6/Zi/o//gGAAPz+\nbvOISG7Vjz2qyxtAwuW/adMmzJ49GyNHjtQiT8w8lRXBkg4QnZ3wVFYE/x1qXqTyjzSmr9kbX1C/\nL77liIiSIKHy//jjj1FVVYUVK1bEPYbLNSj475ycwTEvX93SHHK6L8z0wLxI64pnTCKieEXTffH0\nYyQJlf/JkydRU1ODqVOnAgDq6+uxePFirF+/Hvfdd19UY3i9bVBVgZycwWhquhZzBluWM+SncVuW\nEwDCzou0rkhjxv3Jn4gojL66r2s/WixKtw/N8Uroap8lS5bgyJEjOHDgAA4cOIBhw4Zh27ZtURe/\nFrJL50Cx27tNU+x2ZJfOiTgv3jEH5OfHF9RqA6zW+JYlItKYJlf76Clw7j7SFT2xXu0TaUzHxKJe\nV/t0Y7KrfYhIX3pd7aMIIYQua74l0dM+RsDs+mD21DNrbqD/ZDfEaR8iIjInlj8RkYRY/kREEmL5\nExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY\n/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQh\nlj8RkYRY/kREErIlsnBLSwuefvppXLp0CXa7HWPGjMELL7wAp9OpVT4iIkqChD75K4qCxx57DPv2\n7cP777+PUaNG4ZVXXtEqGxERJUlC5Z+ZmYnCwsLgf48fPx6XL19OOBQRESWXIoQQWgykqioWLVqE\n4uJilJWVaTEkERElSULn/Ltas2YNMjIysGDBgpiW83rboKoCOTmD0dR0Tas4KcXs+mD21DNrbqD/\nZLdYFLhcgxIeU5Pyd7vduHjxIsrLy2Gx8AIiIiKjS7j8N27ciKqqKrz11luw2+1aZCIioiRLqPzP\nnTuHrVu34vbbb8fDDz8MABg5ciS2bNmiSTgiIkqOhMr/zjvvxNmzZ7XKQkREKcIT9EREEmL5ExFJ\niOVPRCQhlj8RkYRY/kREEmL5ExFJiOVPRCQhlj8RkYRY/kREEtLsVz3pptbjx+CprICv2Qub04Xs\n0jlwTCyKebmMcePw+enTIcfp+ljLoEEQQkBcv97ncqHWV93sBSwWQFU13xfJVh1mupKWBtHRAZvT\nBdvQIWg/e9Zw2xcuu9GZNTdgwOwWCxyTJmPYgkd0Wb1mv+cfr/70k86tx4+hYcd2iM7O4DzFbsfQ\nskcjvgGEWq6nwDgA+nxsqOW6rj+a9RFRajjun9LnG0AyftKZp3005Kms6FWoorMTnsqKmJfrKTBO\nNI/ta/2xjkFEydP64WFd1svTPhryNXtjmh7t/Fgf19dy8Y5DREmg0ylJfvLXkM3piml6tPO7Pi7a\nx0YaP54xiChJdLoBFstfQ9mlc6D0uKGNYrcju3ROzMv1FBgnmsf2tf5YxyCi5HFMmqzLennaR0OB\nL1Vjvdon1HJ9XbWTyNU+Pddn1qt9wjHD1T5EvNqnH13tY0bMrg+zZjdrbqD/ZOfVPkREFDeWPxGR\nhFj+REQSYvkTEUlI96t9LBYl5L/Nhtn1weypZ9bcQP/IrtU26H61DxERpR5P+xARSYjlT0QkIZY/\nEZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSSgp5e92u1FcXIyxY8eiuro6OP3gwYP4\n/ve/j4ceegizZ8/G/v37o5pXW1uLuXPnoqSkBHPnzsWFCxeSETti9kOHDuEHP/gBZs2ahQULFqCu\nri6qfEbO3tLSgscffxwlJSWYNWsWli5diubm5uByn3zyCWbPno2SkhIsWrQIXm/y7v0bz34P2Lx5\nc6/ljJ69o6MDq1evxvTp0zFr1iz8+te/Ds4z8jEDGOO1GunYjfTcxztP7+y1tbVYuHAhZsyYgZkz\nZ+LZZ59Fe3t7cMwDBw5gxowZeOCBB7B8+XLcuHGj7yAiCU6ePCkuX74spkyZIs6ePSuEEEJVVXHv\nvfcG//vMmTNi/Pjxwu/3R5wnhBALFy4Uu3fvFkIIsXv3brFw4cJkxA6b/erVq2LChAni/PnzwQyL\nFi0KLhMpn5Gzt7S0iOPHjweXf+mll8Szzz4rhBDC7/eLadOmiZMnTwohhNiyZYtYuXKlYbIHVFVV\nicWLF3dbzgzZ16xZI1588UWhqqoQQoimpqbgPCMfM0Z5rYY7diM99/HOM0L2uro68dlnnwWzLlu2\nTGzevFkIIURbW5soKioStbW1QgghVq1aJV5//fU+cySl/AN6lv+ECRPEqVOnhBBC/POf/xTTp0/v\nc57H4xEFBQXC5/MJIYTw+XyioKBAeL3eZEbvlv3TTz8V3/ve94LzWlpaRF5envB6vRHzGT17T3/9\n61/FI488ElzuwQcfDM7zer1i/PjxSc0tRGzZOzo6xA9/+ENRV1fXazkjZ29raxMFBQWira2t1xhG\nP2aM+FoV4stjN9JzH+88I2Tvadu2bWLVqlVCCCH+8pe/iCVLlgTnnT59utvzF07KftVTURS89tpr\neOKJJ5CRkYHr16/jrbfe6nPelStXMHToUFitVgCA1WrFkCFDcOXKFTidzpRk/+pXvwqPx4PTp09j\n3LhxeP/994PZhBBh80WaZ4TsXTOoqoqdO3eiuLg4OH/48OHB+U6nE6qq4urVq8jMzDRE9k2bNmH2\n7NkYOXJkt+WMnt1qtSIzMxObN2/GiRMnMHDgQCxbtgz33nuv4Y93p9NpuNdq12M30nMf77xkHjPR\nZu+aob29HRUVFfjlL38JoPfxPnz4cFy5cqXPdafsC1+fz4etW7fijTfewMGDB/Hmm29i+fLluH79\nesR5RjB48GD87ne/w/r161FaWgqv1wuHwxE8yI0s2uxr1qxBRkYGFixYoFPS3iJl//jjj1FVVYV5\n8+bpHTOkSNn9fj/q6upw1113obKyEitWrMCTTz6JtrY2vWMDiJzdiK9VIx670Yo1u8/nwy9+8QtM\nnDgRU6dOTWjdKfvkf+bMGTQ2NqKgoAAAUFBQgPT0dNTU1EBRlLDzRowYgYaGBvj9/uALp7GxEbm5\nuamKDgAoKipCUVERAMDj8WDbtm0YPXo0bty4ETafEMLQ2QPcbjcuXryI8vJyWCw3Pw/k5ubi8uXL\nwcc0NzfDYrGk7JNzX9nfffdd1NTUBF8A9fX1WLx4MdavX2/47O3t7bDZbJg5cyYA4J577kFWVhZq\na2sxfPhwQx8zkV7HerxWex67kZ77eOcZITsA+P1+rFixAl/5ylfw3HPPBR+Xm5uLEydOBP/78uXL\nUe3zlH3yHzZsGOrr63H+/HkAQE1NDbxeL0aPHh1xnsvlQn5+Pvbs2QMA2LNnD/Lz81P2J3BAU1MT\ngJt/pm3cuBEPP/wwMjIyIuYzenYA2LhxI6qqqrBlyxbY7fbgMt/4xjfQ3t6OU6dOAQB27dqFGTNm\npDR3pOxLlizBkSNHcODAARw4cADDhg3Dtm3bcN999xk+u9PpRGFhIY4ePQrg5pUcXq8XY8aMMfwx\nY6TXaqhjN9JzH+88I2RXVRUrV66E1WrFiy++CEX58oYu3/nOd/Dvf/87eGXVrl278N3vfrfPDEm5\nmcvatWuxf/9+eDweZGVlITMzE3v37sWf//xn/P73vw8G//nPf45p06YBQMR5NTU1WLlyJVpbW+Fw\nOOB2u3HHHXdoHTti9l/96lf46KOP8MUXX+Db3/42Vq1ahbS0tD7zGTn7uXPnMHPmTNx+++0YMGAA\nAGDkyJHYsmULAOCjjz7C6tWr0dHRgREjRmDDhg3Izs42RPaeiouLUV5ejry8PFNkr6urw6pVq3D1\n6lXYbDYsX74ckydPBmDsYwYwxms10rEb6bmPd57e2Q8dOoQf//jHyMvLC/51/q1vfQurV68GAPzt\nb3/Dhg0boKoq8vPz8dJLLwU/4IXDO3kREUmI/4cvEZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5E\nRBJi+RMRSYjlT0Qkof8Hm7xNb6groUQAAAAASUVORK5CYII=\n",
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGxtJREFUeJzt3WtwE+ehBuB3JUXGxqi2ZAPmmmYa\nU2caQusMZtwUgiGYNkBaM1NSLk4GEtqmpNCGSQlNSxoIRCWhYQKJSQ8tIcnAH3toAz2FdLikQKHQ\nXKgzFFNjwDPgiyRzjIkvSPrOD5DiiyTrstLu+nufX7Cr/fbd1eqVWC1aRQghQEREUjFpHYCIiFKP\n5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQS\nYvkTEUnIonWAlpYb8PsFHI5MuN1tWseJC7Nrg9lTz6i5gYGT3WRSkJ09OOExNS9/v1/A7xfBPxsV\ns2uD2VPPqLkBZu+Op32IiCTE8icikhDLn4hIQlGVv9PpRElJCcaNG4eamhoAQEtLC5588kmUlpZi\n9uzZWLZsGTweT1LDEhGROqL6wnfatGkoLy/HggULgtMURcETTzyBoqIiALfeIF555RWsX78+OUkp\n6VpPHIerqhJejxswmQC/Hxa7Azllc2GbVNz3MQEmE2yTp2D4wscijtl7rHizBcbJnV0a97YmItpt\nCrmvbqsJMW5grGvH/o6Os2dDzutvPRa7Axnjx+PzM2d65AMQ9rntPi8w7fP/nkfrh0cAv7/P89vw\n7ttfzOtNUYDetwW/vT5TZiaEEBA3bgSnxcxkwqBx4+BtbILX44aSlgbR2RnVfmwYfy+6bnr77FtV\nmM2Az9cjZ7jXhF4osdzAvaSkBBUVFcjPz+8zb//+/di1axd27NgRUwC3uw1+v0Bu7hA0N1+PaVm9\nGAjZW08cR+POHRBdXX0eo1itGFb+OACEfQwA2B6c2uNgDzVmYKxY3gDCjfOVZT+Gcs/Xox5HDdFu\nU6T9GVGo8kx0PWYzAAXweaObFyaD7cGpSE+3ovF/90e5MRqKsB9TqfdrIl7dO8ZkUuBwZCY8pirn\n/P1+P3bt2oWSkhI1hiMNuKoqwxaI6OqCq6oy4mMA3Po02M+YgbESzSa6unD5nfdiGkcN0W5Tf/sq\nrAiFFfd6fL7QxR9uXpgMrR8eQeP+DyKvSy90UPxA39eEnqhynf/atWuRkZGBhQsXxrxs93ew3Nwh\nasTRhNGz17RE/r7G2898AIDf32M/hBvT2+KJaX+FG6fT5U75fo92m/rbn/FK1XpCiuc0jex6vSYS\nofaxnnD5O51OXLp0CRUVFTCZYv+HBE/7aCuQ3ZJtD3luOsCSbQeAiI+BydRjP4Qb05Jtj2l/hRsn\nLceR8v0e7Tb1tz8TWX8q1hNS4PXNN4Ho9XpNxEt3p302bdqE6upqbN26FVarNeEwpJ2csrlQwjyH\nitWKnLK5ER8DALbJU/odMzBWotkUqxVjFi0Is0TyRLtN/e2rsBQl/Kx412M2A+Ywn/NCzQuTwTZ5\nCoaVPhR5XXoRYT+mUu/XhJ6YX3jhhRf6e9C6devw/PPPo6mpCfv370dVVRUmTpyIZ555BnfccQf2\n7NmD3bt34+jRo3j44YdjCtDe3gUhgMGD0/D553GcI9WBgZA9bdRo3OFwoOPiRfjb2299yhMCFrsD\nQx+dD9uk4r6PCTCZYJvyYJ8vtno/vvtYsQg3zugZ01K+36PdprD7KgKL3YGhCxbC2/45vC5X33lR\nrMdidyCzaBJ8rde/yDd/ATK//vXQz22veYEMJpsNnZcv3zp33u35HT2lGP/X0PzFvN5Cle7t9Zky\nM4E77gBu3gxOi5nJhEFf/SrgF/C3t0NJS+t5lU2E/Wgbfy8Uu6PPvlWF2dxze8K8JuLVvWMURUFG\nRuIftmO62icZeNpHW8yuDaNmN2puYOBk18VpHyIiMiaWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFT5SWciolipdZc3ig/Ln4hSrvcdyLweNxp37gAAvgGkCE/7\nEFHKqXWXN4ofy5+IUi7cDWhSdmMaYvkTUepZ7I6YppP6WP5ElHJq3eWN4scvfIko5QJf6vJqH+2w\n/IlIE7ZJxSx7DfG0DxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhPotf6fTiZKSEowbNw41NTXB6XV1dZg3bx5KS0sxb948XLx4MZk5iYhIRf3+vMO0adNQXl6OBQsW\n9Ji+Zs0azJ8/H4888gj+9Kc/4de//jV27tyZtKBaa3j3bbR+eATw+wGTCbbJU5DxlbvhqqpEjccN\nmEyA35/U3yjpfuejIEUBrFagszOqDL3HqOnziNQJ5Gx4+4/AzZsxLx8u+6CCAox55hd9pl9+1YmO\ns2e/mJCeDrS3x7xeNWi53xNh1NyAzrKnpyP/9Tc1jaAIIUQ0DywpKUFFRQXy8/PhdrtRWlqKkydP\nwmw2w+fzoaioCAcOHIDdbo8pgNvdBr9fIDd3CJqbr8e1EcnW8O7baD18qO8MRQFC7D7FasWw8sdV\nfQPofeej/oTKEOsYRtb7DaBP8RNpLYY3gO79aDIpcDgyE159XOf8r169imHDhsFsNgMAzGYzhg4d\niqtXryYcSI9aPzwSekaY981k3JEo1J2PIgmVIdYxjKx30bP4SXc0+ldngOa/6tn9HSw3d4iGScKr\n8ftjXsbb4lF1e2paPAlniGcMI+ux7RrmIAonlo5Qux/jKv+8vDw0NjbC5/MFT/s0NTUhLy8v5rGM\ncNoncC49FpZsu6rbY8m2x3yLu94Z4hnDyHR7PBHdFu0xqpvTPg6HAwUFBdi7dy8AYO/evSgoKIj5\nfL9R2CZPCT1DUUJPTsIdiULd+SiSUBliHcPIBhUURPw7kebS0zVdfb9f+K5btw4HDhyAy+VCdnY2\nsrKysG/fPtTW1mLVqlVobW2FzWaD0+nEXXfdFXMAQ3zyR+SrfbwGvtpHS4le7ROOEa72IcnFeLVP\nMj75R321T7IYpfwjYXZtMHvqGTU3MHCya3rah4iIjI3lT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMR\nSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGELIkO\ncOjQIWzevBlCCAghsGzZMsyYMUONbERElCQJlb8QAs8++yzee+895Ofn4z//+Q9+8IMfYPr06TCZ\n+I8KWbSeOA5XVSW8HjcsdgdyyubCNqlY61hxGUjbYiQDab+H2hYAutu+hD/5m0wmXL9+HQBw/fp1\nDB06lMUvkdYTx9G4cwdEVxcAwOtxo3HnDgDQ/OCO1UDaFiMZSPs91LY0/PEPAATg8wWn6WH7Empp\nRVHw2muv4amnnsLUqVPxk5/8BE6nU61sZACuqsrggR4gurrgqqrUKFH8BtK2GMlA2u+htgU+b7D4\nA/SwfQl98vd6vdi2bRveeOMNFBYW4l//+hdWrFiBffv2YfDgwVGN4XBkBv+cmzskkTiakjV7TYsn\n5HRviycl+0TNdaR6W4x6zKidO5X7Pdn7PNy2hBLr9qmdPaHyP3v2LJqamlBYWAgAKCwsRHp6Ompr\nazF+/PioxnC72+D3C+TmDkFz8/VE4mhG5uyWbDu8HnfI6cneJ2rv91Rui1GPmWTkTtV+T8U+D7ct\n4R4bbZ7u2U0mpceH5ngldNpn+PDhaGhowIULFwAAtbW1cLvdGDNmTMLByBhyyuZCsVp7TFOs1uCX\nXEYykLbFSAbSfg+1LTBbALO5xyQ9bF9Cn/xzc3PxwgsvYPny5VAUBQCwfv16ZGVlqRKO9C/whZXe\nrmSIx0DaFiMZSPs93LaEmqb19ilCCKFlAJ720Raza8Oo2Y2aGxg42XVx2oeIiIyJ5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJKGEb+aSajVP/xhob+8xLfBbGZ//\n9zxaPzwC+P2AyQTb5CloPXwo+ZmSvobkYXZtGDW7UXMDGmZXFEAIwGQC/H7+tk9ALL/tE6r4gwI7\nmIhI5xSrFcPKH4/6DYC/7ROu+AEWPxEZhh7u5GWs8iciGiCivelLsrD8iYg0YLE7NF2/sco/PT38\nvNs3kyEi0js93MnLUOWf//qbId8ALHYHhi95ErYHp976Rh24dbXPg1NTnJCIqJfAB9Pb3WSxO2L6\nsjdZDHW1j14xuzaYPfWMmhsYONnlvNqHiIhUwfInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+I\nSEIsfyIiCbH8iYgklPDNXDo7O7F+/Xr84x//QFpaGiZMmIC1a9eqkY2IiJIk4fLfuHEj0tLSsH//\nfiiKApfLpUYuIiLDaz1xHK6qSng9bt3cwSsgofK/ceMG9uzZgyNHjkC5/eNFOTk5qgQjIjKy1hPH\n0bhzB0RXF4Bbv9/fuHMHAOjiDSChc/719fXIysrCli1bUFZWhkWLFuH06dNqZSMiMixXVWWw+AP0\ncAevgIQ++ft8PtTX1+Oee+7BL37xC3z66af40Y9+hA8++ACZmdH96lz3X6fLzR2SSBxNMbs2mD31\njJobSG32mhZPyOneFk9cOdTOnlD55+XlwWKxYNasWQCA++67D9nZ2airq8O9994b1Rj8SWdtMbs2\njJrdqLmB1Ge3ZNtD3qrRkm2POYfuftLZbrejqKgIx44dAwDU1dXB7XZj7NixCQcjIjKynLK5UKzW\nHtP0cAevgISv9vnNb36D1atXw+l0wmKx4Le//S1sNpsa2YiIDCvwpe6AvNoHAEaPHo133nlHjSxE\nRAOKbVKxbsq+N/4PXyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIi\nCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+I\nSEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpKQauW/ZcsWjBs3DjU1NWoNSURE\nSWJRY5DPPvsMn3zyCUaOHKnGcDFrPXEcrqpKeD1uWOwO5JTNhW1Scb/z4h3z8qtOdJw9G3xsj7c7\nRQGsVqCzEzCZAL8/uDyA4JiBeVoz8ls1s6eeUXMD+s2e/z87NFlvwuXf1dWFF198Ea+++irKy8vV\nyBST1hPH0bhzB0RXFwDA63GjceeO4Pxw8yK9AUQa89qxv/co/j6EuFX8QLDcvR43Gv74BwAC8Pl6\nzCMiudU88bgmbwAJl//mzZsxZ84cjBo1So08MXNVVQZLOkB0dcFVVRn8c6h5kco/0phejzu+oD5v\nfMsRESVBQuX/8ccfo7q6GitXrox7DIcjM/jn3NwhMS9f0+IJOd0bZnpgXqR1xTMmEVG8oum+ePox\nkoTK/9SpU6itrcW0adMAAA0NDViyZAk2bNiABx54IKox3O42+P0CublD0Nx8PeYMlmx7yE/jlmw7\nAISdF2ldkcaM+5M/EVEY/XVf9340mZQeH5rjldDVPkuXLsXRo0dx8OBBHDx4EMOHD8f27dujLn41\n5JTNhWK19pimWK3IKZsbcV68Yw4qKIgvqNkCmM3xLUtEpDJVrvbRUuDcfaQremK92ifSmLZJxX2u\n9unBYFf7EJG2tLraRxFCCE3WfFuip330gNm1weypZ9TcwMDJrovTPkREZEwsfyIiCbH8iYgkxPIn\nIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8\niYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIs\nfyIiCbH8iYgkZElk4ZaWFjz77LO4fPkyrFYrxo4dixdffBF2u12tfERElAQJffJXFAVPPPEE9u/f\nj/fffx+jR4/GK6+8olY2IiJKkoTKPysrC0VFRcG/T5gwAVeuXEk4FBERJZcihBBqDOT3+7F48WKU\nlJSgvLxcjSGJiChJEjrn393atWuRkZGBhQsXxrSc290Gv18gN3cImpuvqxUnpZhdG8yeekbNDQyc\n7CaTAocjM+ExVSl/p9OJS5cuoaKiAiYTLyAiItK7hMt/06ZNqK6uxltvvQWr1apGJiIiSrKEyv/8\n+fPYtm0b7rzzTjz66KMAgFGjRmHr1q2qhCMiouRIqPzvvvtunDt3Tq0sRESUIjxBT0QkIZY/EZGE\nWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIdV+1ZNuaT1xHK6qSng9bljsDuSU\nzYVtUnHMy2WMH4/Pz5wJOU73x5oyMyGEgLhxo9/lQq2vxuMGTCbA71d9XyRbTZjpSloaRGcnLHYH\nLMOGouPcOd1tX7jsemfU3IAOs5tMsE2eguELH9Nk9ar9nn+8BtJPOreeOI7GnTsgurqC8xSrFcPK\nH4/4BhBqud4C4wDo97Ghluu+/mjWR0SpYXtwar9vAMn4SWee9lGRq6qyT6GKri64qipjXq63wDjR\nPLa/9cc6BhElT+uHRzRZL0/7qMjrccc0Pdr5sT6uv+XiHYeIkkCjU5L85K8ii90R0/Ro53d/XLSP\njTR+PGMQUZJodAMslr+KcsrmQul1QxvFakVO2dyYl+stME40j+1v/bGOQUTJY5s8RZP18rSPigJf\nqsZ6tU+o5fq7aieRq316r8+oV/uEY4SrfYh4tc8AutrHiJhdG0bNbtTcwMDJzqt9iIgobix/IiIJ\nsfyJiCTE8icikpDmV/uYTErIPxsNs2uD2VPPqLmBgZFdrW3Q/GofIiJKPZ72ISKSEMufiEhCLH8i\nIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSUFLK3+l0oqSkBOPGjUNNTU1w+qFDh/Dd\n734XjzzyCObMmYMDBw5ENa+urg7z5s1DaWkp5s2bh4sXLyYjdsTshw8fxve+9z3Mnj0bCxcuRH19\nfVT59Jy9paUFTz75JEpLSzF79mwsW7YMHo8nuNwnn3yCOXPmoLS0FIsXL4bbnbx7/8az3wO2bNnS\nZzm9Z+/s7MSaNWswY8YMzJ49G7/61a+C8/R8zAD6eK1GOnYjPffxztM6e11dHRYtWoSZM2di1qxZ\neO6559DR0REc8+DBg5g5cyYeeughrFixAu3t7f0HEUlw6tQpceXKFTF16lRx7tw5IYQQfr9f3H//\n/cG/nz17VkyYMEH4fL6I84QQYtGiRWLPnj1CCCH27NkjFi1alIzYYbNfu3ZNTJw4UVy4cCGYYfHi\nxcFlIuXTc/aWlhZx4sSJ4PIvv/yyeO6554QQQvh8PjF9+nRx6tQpIYQQW7duFatWrdJN9oDq6mqx\nZMmSHssZIfvatWvFSy+9JPx+vxBCiObm5uA8PR8zenmthjt2Iz338c7TQ/b6+nrx2WefBbMuX75c\nbNmyRQghRFtbmyguLhZ1dXVCCCFWr14tXn/99X5zJKX8A3qX/8SJE8Xp06eFEEL885//FDNmzOh3\nnsvlEoWFhcLr9QohhPB6vaKwsFC43e5kRu+R/dNPPxXf+c53gvNaWlpEfn6+cLvdEfPpPXtvf/3r\nX8Vjjz0WXO7hhx8OznO73WLChAlJzS1EbNk7OzvF97//fVFfX99nOT1nb2trE4WFhaKtra3PGHo/\nZvT4WhXii2M30nMf7zw9ZO9t+/btYvXq1UIIIf7yl7+IpUuXBuedOXOmx/MXTsp+1VNRFLz22mt4\n6qmnkJGRgRs3buCtt97qd97Vq1cxbNgwmM1mAIDZbMbQoUNx9epV2O32lGT/8pe/DJfLhTNnzmD8\n+PF4//33g9mEEGHzRZqnh+zdM/j9fuzatQslJSXB+SNGjAjOt9vt8Pv9uHbtGrKysnSRffPmzZgz\nZw5GjRrVYzm9ZzebzcjKysKWLVtw8uRJDB48GMuXL8f999+v++Pdbrfr7rXa/diN9NzHOy+Zx0y0\n2btn6OjoQGVlJX7+858D6Hu8jxgxAlevXu133Sn7wtfr9WLbtm144403cOjQIbz55ptYsWIFbty4\nEXGeHgwZMgS/+93vsGHDBpSVlcHtdsNmswUPcj2LNvvatWuRkZGBhQsXapS0r0jZP/74Y1RXV2P+\n/PlaxwwpUnafz4f6+nrcc889qKqqwsqVK/H000+jra1N69gAImfX42tVj8dutGLN7vV68bOf/QyT\nJk3CtGnTElp3yj75nz17Fk1NTSgsLAQAFBYWIj09HbW1tVAUJey8kSNHorGxET6fL/jCaWpqQl5e\nXqqiAwCKi4tRXFwMAHC5XNi+fTvGjBmD9vb2sPmEELrOHuB0OnHp0iVUVFTAZLr1eSAvLw9XrlwJ\nPsbj8cBkMqXsk3N/2d99913U1tYGXwANDQ1YsmQJNmzYoPvsHR0dsFgsmDVrFgDgvvvuQ3Z2Nurq\n6jBixAhdHzORXsdavFZ7H7uRnvt45+khOwD4fD6sXLkSX/rSl/D8888HH5eXl4eTJ08G/37lypWo\n9nnKPvkPHz4cDQ0NuHDhAgCgtrYWbrcbY8aMiTjP4XCgoKAAe/fuBQDs3bsXBQUFKfsncEBzczOA\nW/9M27RpEx599FFkZGREzKf37ACwadMmVFdXY+vWrbBarcFlvva1r6GjowOnT58GAOzevRszZ85M\nae5I2ZcuXYqjR4/i4MGDOHjwIIYPH47t27fjgQce0H12u92OoqIiHDt2DMCtKzncbjfGjh2r+2NG\nT6/VUMdupOc+3nl6yO73+7Fq1SqYzWa89NJLUJQvbujyrW99C//+97+DV1bt3r0b3/72t/vNkJSb\nuaxbtw4HDhyAy+VCdnY2srKysG/fPvz5z3/G73//+2Dwn/70p5g+fToARJxXW1uLVatWobW1FTab\nDU6nE3fddZfasSNm/+Uvf4mPPvoIN2/exDe/+U2sXr0aaWlp/ebTc/bz589j1qxZuPPOOzFo0CAA\nwKhRo7B161YAwEcffYQ1a9ags7MTI0eOxMaNG5GTk6OL7L2VlJSgoqIC+fn5hsheX1+P1atX49q1\na7BYLFixYgWmTJkCQN/HDKCP12qkYzfScx/vPK2zHz58GD/84Q+Rn58f/Nf5N77xDaxZswYA8Le/\n/Q0bN26E3+9HQUEBXn755eAHvHB4Jy8iIgnxf/gSEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQS+n9YnE5sVgm99QAAAABJRU5ErkJggg==\n",
             "text/plain": [
               "<Figure size 432x288 with 1 Axes>"
             ]
@@ -2308,7 +2002,10 @@
         "colab_type": "text"
       },
       "source": [
-        "# -----current: test ready-----"
+        "# -----current: test ready-----\n",
+        "- converting to pandas \n",
+        "  - to see what's going on\n",
+        "    - figuring out what can and what can't be replicated in cuML"
       ]
     },
     {
@@ -2319,7 +2016,9 @@
         "colab": {}
       },
       "source": [
-        "from cuml.preprocessing.model_selection import train_test_split\n",
+        "from sklearn import neighbors\n",
+        "# from cuml.preprocessing.model_selection import train_test_split\n",
+        "from sklearn.model_selection import StratifiedKFold,GridSearchCV,train_test_split\n",
         "#location seems to be related to building quality, (knnclassifier)\n",
         "\n",
         "def fillna_knn(df, base, target):\n",
@@ -2335,8 +2034,10 @@
         "    #print(not_missing_rows.head())\n",
         "    Y = not_missing_rows[target]\n",
         "    X = not_missing_rows[base]\n",
-        "    #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192,stratify=Y)\n",
-        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8)\n",
+        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, \n",
+        "                                                        test_size=0.20,\n",
+        "                                                        random_state=3192,\n",
+        "                                                        stratify=Y)\n",
         "    metrics       = ['euclidean'] \n",
         "    weights       = ['distance'] \n",
         "    numNeighbors  = [5,10,15,20,25]\n",
@@ -2363,21 +2064,88 @@
       "metadata": {
         "id": "6eES-hq--NKZ",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "2bc86856-507d-47bf-cfab-d29649cba819",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 903
+        }
       },
       "source": [
+        "# make safe copy\n",
         "# test = df_train.copy()\n",
-        "df_train = test.copy()"
+        "df_train = test.copy()\n",
+        "# switch to pandas (figuring out what's going on)\n",
+        "df_train = df_train.to_pandas()\n",
+        "\n",
+        "print(df_train.info())"
       ],
-      "execution_count": 0,
-      "outputs": []
+      "execution_count": 191,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "<class 'pandas.core.frame.DataFrame'>\n",
+            "RangeIndex: 90275 entries, 0 to 90274\n",
+            "Data columns (total 45 columns):\n",
+            "parcelid                                 90275 non-null int64\n",
+            "logerror                                 90275 non-null float64\n",
+            "ac_id                                    28781 non-null float64\n",
+            "basement_sqft                            90275 non-null float64\n",
+            "total_bath                               89110 non-null float64\n",
+            "bedroomcnt                               88854 non-null float64\n",
+            "buildingqualitytypeid                    57364 non-null float64\n",
+            "deck_flag                                90275 non-null float64\n",
+            "finished_living_area_entryfloor_sqft2    6856 non-null float64\n",
+            "total_finished_living_area_sqft          89614 non-null float64\n",
+            "finished_living_area_entryfloor_sqft1    6856 non-null float64\n",
+            "fips                                     90275 non-null float64\n",
+            "fireplace_count                          90275 non-null float64\n",
+            "full_bath                                89093 non-null float64\n",
+            "garagecarcnt                             29937 non-null float64\n",
+            "garage_sqft                              21017 non-null float64\n",
+            "has_hottub_or_spa                        90275 non-null int64\n",
+            "heating_system_id                        56080 non-null float64\n",
+            "latitude                                 90275 non-null float64\n",
+            "longitude                                90275 non-null float64\n",
+            "lot_area_sqft                            80125 non-null float64\n",
+            "pool_count                               90275 non-null float64\n",
+            "pool_sqft                                90275 non-null float64\n",
+            "just_hottub_or_spa                       90275 non-null float64\n",
+            "pool_with_spa_tub_yes                    90275 non-null float64\n",
+            "pool_with_spa_tub_no                     90275 non-null float64\n",
+            "propertylandusetypeid                    90275 non-null float64\n",
+            "roomcnt                                  88859 non-null float64\n",
+            "basement_flag                            90275 non-null float64\n",
+            "half_bath                                89093 non-null float64\n",
+            "unitcnt                                  90275 non-null float64\n",
+            "patio_sqft                               90275 non-null float64\n",
+            "storage_sqft                             90275 non-null float64\n",
+            "yearbuilt                                89519 non-null float64\n",
+            "numberofstories                          20581 non-null float64\n",
+            "fireplaceflag                            90275 non-null bool\n",
+            "structure_tax                            89895 non-null float64\n",
+            "total_parcel_tax                         90274 non-null float64\n",
+            "land_tax                                 90274 non-null float64\n",
+            "total_property_tax_2016                  90269 non-null float64\n",
+            "taxdelinquencyflag                       90275 non-null int64\n",
+            "taxdelinquencyyear                       90275 non-null float64\n",
+            "transaction_month                        90275 non-null int16\n",
+            "census_tractnumber                       90275 non-null object\n",
+            "block_number                             90275 non-null object\n",
+            "dtypes: bool(1), float64(38), int16(1), int64(3), object(2)\n",
+            "memory usage: 29.9+ MB\n",
+            "None\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "AT8Osn51lD9v",
         "colab_type": "code",
-        "outputId": "83435ba5-0887-47fb-f8fb-ceeb9dd92fda",
+        "outputId": "8ab0690a-2e06-468e-b7ce-f4d051a3ce83",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 573
@@ -2390,7 +2158,7 @@
         "print(f'BUILDINGTYPEID HEAD\\n{df_train.buildingqualitytypeid.head()}\\n')\n",
         "print(f'DF TRAIN HEAD\\n{df_train.head()}')"
       ],
-      "execution_count": 49,
+      "execution_count": 192,
       "outputs": [
         {
           "output_type": "stream",
@@ -2408,23 +2176,23 @@
             "8.0         5\n",
             "6.0         2\n",
             "11.0        1\n",
-            "Name: buildingqualitytypeid, dtype: int32\n",
+            "Name: buildingqualitytypeid, dtype: int64\n",
             "\n",
             "BUILDINGTYPEID HEAD\n",
-            "0     7.0\n",
-            "1    null\n",
-            "2    null\n",
-            "3     7.0\n",
-            "4     4.0\n",
+            "0    7.0\n",
+            "1    NaN\n",
+            "2    NaN\n",
+            "3    7.0\n",
+            "4    4.0\n",
             "Name: buildingqualitytypeid, dtype: float64\n",
             "\n",
             "DF TRAIN HEAD\n",
-            "   parcelid  logerror ac_id  ...  transaction_month  census_tractnumber  block_number\n",
-            "0  11827818    0.0402  null  ...                  3             5315.03          1013\n",
-            "1  12123024    0.0296  null  ...                  3             4625.00          1017\n",
-            "2  13867327    0.0344  null  ...                  3             0114.01          2017\n",
-            "3  12681894    0.0060  null  ...                  3             6513.02          1004\n",
-            "4  12848541    0.0695   1.0  ...                  3             4087.03          1018\n",
+            "   parcelid  logerror  ac_id  ...  transaction_month  census_tractnumber  block_number\n",
+            "0  11827818    0.0402    NaN  ...                  3             5315.03          1013\n",
+            "1  12123024    0.0296    NaN  ...                  3             4625.00          1017\n",
+            "2  13867327    0.0344    NaN  ...                  3             0114.01          2017\n",
+            "3  12681894    0.0060    NaN  ...                  3             6513.02          1004\n",
+            "4  12848541    0.0695    1.0  ...                  3             4087.03          1018\n",
             "\n",
             "[5 rows x 45 columns]\n"
           ],
@@ -2437,18 +2205,17 @@
       "metadata": {
         "id": "79bB7JKdAEtX",
         "colab_type": "code",
-        "outputId": "b1b1e940-e89a-40e8-c5af-5919c896ca19",
+        "outputId": "32b79160-fd19-4d39-988a-fc5fcd7c3284",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 225
         }
       },
       "source": [
-        "temp=df_train.copy()\n",
-        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].fillna(-1)\n",
-        "print(f'NULL COUNT = {temp.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{temp.buildingqualitytypeid.value_counts()}')"
+        "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].fillna(-1)\n",
+        "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}')"
       ],
-      "execution_count": 50,
+      "execution_count": 193,
       "outputs": [
         {
           "output_type": "stream",
@@ -2464,42 +2231,75 @@
             " 8.0         5\n",
             " 6.0         2\n",
             " 11.0        1\n",
-            "Name: buildingqualitytypeid, dtype: int32\n"
+            "Name: buildingqualitytypeid, dtype: int64\n"
           ],
           "name": "stdout"
         }
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DVgF1c_p_bN1",
+        "colab_type": "text"
+      },
+      "source": [
+        "# -----current: break-----\n",
+        "- break 1 of 2"
+      ]
+    },
     {
       "cell_type": "code",
       "metadata": {
         "id": "mAB9bsrPAGzQ",
         "colab_type": "code",
-        "outputId": "ff5376d3-6854-4d05-a7c1-7ffe0a6136a4",
+        "outputId": "d847758e-212e-4de8-85c4-89b469b71c48",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 347
+          "height": 762
         }
       },
       "source": [
         "# say we run this whole thing by buildingqualitytypeid\n",
-        "# temp=temp.groupby(\"buildingqualitytypeid\")\n",
         "# drop building types that aren't seen at least 3 times in the data\n",
-        "# .filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
-        "# conditions = (temp.buildingqualitytypeid.value_counts > 3)\n",
-        "# print(temp.loc[temp.buildingqualitytypeid.astype(int) > 3].head())\n",
-        "# temp.loc[temp.census_tractnumber.value_counts() > 3]\n",
-        "# print(temp.loc[temp.census_tractnumber.value_counts().values > 3].to_pandas().head())\n",
+        "# df_train = df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
         "\n",
-        "\"\"\"still working on how to best do this in RAPIDS\n",
-        "\"\"\"\n",
-        "print(f'{temp.buildingqualitytypeid.value_counts()}\\n')\n",
-        "temp = temp.to_pandas()\n",
-        "temp = temp.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
-        "temp = cudf.from_pandas(temp)\n",
-        "print(temp.buildingqualitytypeid.value_counts())"
+        "# BACK TO cuDF\n",
+        "df_train = cudf.from_pandas(df_train)\n",
+        "\n",
+        "print(df_train.buildingqualitytypeid.value_counts())\n",
+        "print()\n",
+        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "print()\n",
+        "\n",
+        "type_ids = list(set(df_train.buildingqualitytypeid.values))\n",
+        "from time import sleep\n",
+        "safe = []\n",
+        "for tid in type_ids:\n",
+        "  print(tid)\n",
+        "  sleep(5)\n",
+        "  t = len(df_train.loc[df_train.buildingqualitytypeid == tid])\n",
+        "  if t > 3:\n",
+        "    safe.append(tid)\n",
+        "  else:\n",
+        "    print(f'{tid} count too low @ {t}')\n",
+        "for tid in type_ids:\n",
+        "  if tid not in safe:\n",
+        "    df_train = df_train.loc[df_train.buildingqualitytypeid != tid]\n",
+        "\n",
+        "print()\n",
+        "print(df_train.buildingqualitytypeid.value_counts())\n",
+        "print()\n",
+        "\n",
+        "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].replace(-1,np.nan)\n",
+        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+        "print(df_train.shape)\n",
+        "\n",
+        "# BACK TO PANDAS\n",
+        "df_train = df_train.to_pandas()"
       ],
-      "execution_count": 51,
+      "execution_count": 194,
       "outputs": [
         {
           "output_type": "stream",
@@ -2515,56 +2315,46 @@
             " 11.0        1\n",
             "Name: buildingqualitytypeid, dtype: int32\n",
             "\n",
-            "-1.0     32911\n",
-            " 7.0     29310\n",
-            " 4.0     23839\n",
-            " 1.0      2627\n",
-            " 10.0     1461\n",
-            " 12.0      119\n",
-            " 8.0         5\n",
-            "Name: buildingqualitytypeid, dtype: int32\n"
+            "0\n",
+            "(90275, 45)\n",
+            "\n",
+            "1.0\n",
+            "4.0\n",
+            "6.0\n",
+            "6.0 count too low @ 2\n",
+            "7.0\n",
+            "8.0\n",
+            "10.0\n",
+            "11.0\n"
           ],
           "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uCyRxp-7qEXf",
-        "colab_type": "code",
-        "outputId": "629f0745-3a63-4bd8-aa10-835a94450cb6",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 52
-        }
-      },
-      "source": [
-        "temp['buildingqualitytypeid'] = temp['buildingqualitytypeid'].replace(-1,np.nan)\n",
-        "print(temp.buildingqualitytypeid.isnull().sum())\n",
-        "print(temp.shape)"
-      ],
-      "execution_count": 52,
-      "outputs": [
+        },
         {
-          "output_type": "stream",
-          "text": [
-            "32911\n",
-            "(90272, 45)\n"
-          ],
-          "name": "stdout"
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-194-5024fb6909aa>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m   \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m   \u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m   \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mtid\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0msafe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    107\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    108\u001b[0m                 \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 109\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple_arg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    110\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    111\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple_arg\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    218\u001b[0m                 \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m                 \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m         \u001b[0;31m# Step 4: Downcast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_downcast_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/dataframe.py\u001b[0m in \u001b[0;36mindex\u001b[0;34m(self, _index)\u001b[0m\n\u001b[1;32m   1058\u001b[0m                 \u001b[0;34m\"have %d elements\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mold_length\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1059\u001b[0m             )\n\u001b[0;32m-> 1060\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1062\u001b[0m         \u001b[0;31m# try to build an index from generic _index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 1 elements, new values have 90275 elements"
+          ]
         }
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "DVgF1c_p_bN1",
+        "id": "Zl7eXGt_g1uU",
         "colab_type": "text"
       },
       "source": [
         "# -----current: break-----\n",
-        "- below is last cell run"
+        "- break 2 of 2\n",
+        "  - below is last cell run"
       ]
     },
     {
@@ -2572,42 +2362,76 @@
       "metadata": {
         "id": "Q3ZBSOHm-79A",
         "colab_type": "code",
-        "outputId": "3da3e840-8d13-426a-e0aa-8ae20679326b",
+        "outputId": "e9ddb9b3-0bb0-4cf7-fa8e-ca35b9ea7f46",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 394
+          "height": 557
         }
       },
       "source": [
-        "missing_values = fillna_knn(temp, \n",
+        "# run cell above (currently broken) as would be in pandas\n",
+        "not_df_train = df_train.to_pandas()\n",
+        "not_df_train = not_df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+        "\n",
+        "missing_values = fillna_knn(not_df_train, \n",
         "                            base = ['latitude', 'longitude'], \n",
         "                            target = 'buildingqualitytypeid')\n",
         "\n",
         "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['buildingqualitytypeid'].isnull()\n",
-        "df_train.loc[missing_values_boolflag, 'buildingqualitytypeid'] = missing_values\n",
+        "missing_values_boolflag = not_df_train['buildingqualitytypeid'].isnull()\n",
+        "not_df_train.loc[missing_values_boolflag, 'buildingqualitytypeid'] = missing_values\n",
         "\n",
-        "print(df_train.buildingqualitytypeid.isnull().sum())"
+        "print(not_df_train.buildingqualitytypeid.isnull().sum())"
       ],
-      "execution_count": 53,
+      "execution_count": 195,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
-            "# of miss 32911\n"
+            "# of miss 0\n",
+            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
           ],
           "name": "stdout"
         },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
+            "                     metric_params=None, n_jobs=-1, n_neighbors=15, p=2,\n",
+            "                     weights='distance')\n",
+            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 15, 'weights': 'distance'}\n",
+            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    7.1s finished\n"
+          ],
+          "name": "stderr"
+        },
         {
           "output_type": "error",
-          "ename": "NameError",
+          "ename": "ValueError",
           "evalue": "ignored",
           "traceback": [
             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-53-d133e1117381>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m missing_values = fillna_knn(temp, \n\u001b[1;32m      2\u001b[0m                             \u001b[0mbase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m                             target = 'buildingqualitytypeid')\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m<ipython-input-43-54081f35e0d8>\u001b[0m in \u001b[0;36mfillna_knn\u001b[0;34m(df, base, target)\u001b[0m\n\u001b[1;32m     21\u001b[0m     \u001b[0mnumNeighbors\u001b[0m  \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m     \u001b[0mparam_grid\u001b[0m    \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn_neighbors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnumNeighbors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m     \u001b[0mcv\u001b[0m            \u001b[0;34m=\u001b[0m \u001b[0mStratifiedKFold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_splits\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3192\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     24\u001b[0m     \u001b[0mgrid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mneighbors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mKNeighborsClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparam_grid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mscoring\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'f1_weighted'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrefit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mreturn_train_score\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mpre_dispatch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'n_jobs'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m     \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mY_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mNameError\u001b[0m: name 'StratifiedKFold' is not defined"
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-195-5b5613488983>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m missing_values = fillna_knn(not_df_train, \n\u001b[1;32m      5\u001b[0m                             \u001b[0mbase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m                             target = 'buildingqualitytypeid')\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-189-96906960b52d>\u001b[0m in \u001b[0;36mfillna_knn\u001b[0;34m(df, base, target)\u001b[0m\n\u001b[1;32m     35\u001b[0m     \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m     \u001b[0mZ\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_values_boolflag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     38\u001b[0m     \u001b[0;31m#df.loc[ missing_values_boolflag, target ]  = Z\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/utils/metaestimators.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m         \u001b[0;31m# lambda, but not partial, allows help() to work with update_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m         \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m         \u001b[0;31m# update the docstring of the returned function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m         \u001b[0mupdate_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    455\u001b[0m         \"\"\"\n\u001b[1;32m    456\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'predict'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    459\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mif_delegate_has_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelegate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'best_estimator_'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'estimator'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/neighbors/classification.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    145\u001b[0m             \u001b[0mClass\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mdata\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m         \"\"\"\n\u001b[0;32m--> 147\u001b[0;31m         \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csr'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    149\u001b[0m         \u001b[0mneigh_dist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mneigh_ind\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkneighbors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    548\u001b[0m                              \u001b[0;34m\" minimum of %d is required%s.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m                              % (n_samples, array.shape, ensure_min_samples,\n\u001b[0;32m--> 550\u001b[0;31m                                 context))\n\u001b[0m\u001b[1;32m    551\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    552\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mensure_min_features\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required."
           ]
         }
       ]
@@ -2619,7 +2443,8 @@
         "colab_type": "text"
       },
       "source": [
-        "# BELOW NOT RUN"
+        "# BELOW NOT (really) RUN\n",
+        "- if run, was in pandas"
       ]
     },
     {
@@ -2627,7 +2452,11 @@
       "metadata": {
         "id": "oTh_XPErqkHf",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3e667bca-70c5-4b66-c7d2-12d171cb140b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 278
+        }
       },
       "source": [
         "print(df_train.heating_system_id.isnull().sum())\n",
@@ -2651,14 +2480,52 @@
         "print(df_train.heating_system_id.isnull().sum())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "34194\n",
+            "(90272, 45)\n",
+            "34194\n",
+            "(90266, 45)\n",
+            "# of miss 34194\n",
+            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
+            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    3.3s finished\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
+            "                     metric_params=None, n_jobs=-1, n_neighbors=15, p=2,\n",
+            "                     weights='distance')\n",
+            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 15, 'weights': 'distance'}\n",
+            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
+            "predicted output shape (34194,)\n",
+            "0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "oVjNSkUYqnCt",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "80fc7e87-36cd-44b7-96e9-ef0631c7d10c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 278
+        }
       },
       "source": [
         "print(df_train.ac_id.isnull().sum())\n",
@@ -2681,14 +2548,52 @@
         "print(df_train.ac_id.isnull().sum())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "61492\n",
+            "(90272, 45)\n",
+            "61492\n",
+            "(90270, 45)\n",
+            "# of miss 61492\n",
+            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
+            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    2.0s finished\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
+            "                     metric_params=None, n_jobs=-1, n_neighbors=25, p=2,\n",
+            "                     weights='distance')\n",
+            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 25, 'weights': 'distance'}\n",
+            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
+            "predicted output shape (61492,)\n",
+            "0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
       "metadata": {
         "id": "qTbcYbexqr0Y",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3459affa-a41a-4241-ab62-f0dfcadda039",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 278
+        }
       },
       "source": [
         "#yearbuilt\n",
@@ -2711,7 +2616,41 @@
         "print(df_train.yearbuilt.isnull().sum())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "755\n",
+            "(90272, 45)\n",
+            "755\n",
+            "(90258, 45)\n",
+            "# of miss 755\n",
+            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
+            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:   44.3s finished\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
+            "                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,\n",
+            "                     weights='distance')\n",
+            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}\n",
+            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
+            "predicted output shape (755,)\n",
+            "0\n"
+          ],
+          "name": "stdout"
+        }
+      ]
     },
     {
       "cell_type": "code",
@@ -2763,7 +2702,11 @@
       "metadata": {
         "id": "pj5PXm7ozg5l",
         "colab_type": "code",
-        "colab": {}
+        "outputId": "3d42279f-221c-444c-8795-05a0832f97cd",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 606
+        }
       },
       "source": [
         "#garage_sqft\n",
@@ -2780,11 +2723,57 @@
         "\n",
         "print(\"predicted output shape\",missing_values.shape)\n",
         "missing_values_boolflag = df_train['garage_sqft'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'garage_sqft' ] = missing_values\n",
+        "df_train.loc[missing_values_boolflag, 'garage_sqft'] = missing_values\n",
         "print(df_train.garage_sqft.isnull().sum())"
       ],
       "execution_count": 0,
-      "outputs": []
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "69255\n",
+            "(90272, 45)\n",
+            "8920\n",
+            "(29647, 45)\n",
+            "# of miss 8920\n",
+            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
+            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    2.7s finished\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "grid.best_estimator_ KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='euclidean',\n",
+            "                    metric_params=None, n_jobs=-1, n_neighbors=5, p=2,\n",
+            "                    weights='distance')\n",
+            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}\n",
+            "grid.scorer_ make_scorer(mean_absolute_error, greater_is_better=False)\n",
+            "predicted output shape (8920,)\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-55-bed8646c0f85>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0mmissing_values_boolflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'garage_sqft'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_values_boolflag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'garage_sqft'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmissing_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     15\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgarage_sqft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m    188\u001b[0m             \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_setitem_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 190\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    191\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    192\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[0;34m(self, indexer, value)\u001b[0m\n\u001b[1;32m    609\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    610\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m                         raise ValueError('Must have equal len keys and value '\n\u001b[0m\u001b[1;32m    612\u001b[0m                                          'when setting with an iterable')\n\u001b[1;32m    613\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: Must have equal len keys and value when setting with an iterable"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "code",

From af425f5bc700c2d4c756d070f059f635393d7262 Mon Sep 17 00:00:00 2001
From: Winston <43570913+gumdropsteve@users.noreply.github.com>
Date: Thu, 3 Oct 2019 23:59:41 -0700
Subject: [PATCH 5/7] Create linear_regression_boston_demo.ipynb

requested change: needs non colab version in intermediate > examples
---
 .../linear_regression_boston_demo.ipynb       | 768 ++++++++++++++++++
 1 file changed, 768 insertions(+)
 create mode 100644 intermediate_notebooks/examples/linear_regression_boston_demo.ipynb

diff --git a/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb b/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb
new file mode 100644
index 00000000..53b868d2
--- /dev/null
+++ b/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb
@@ -0,0 +1,768 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "LOCAL_intro_lin_reg_cuml",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2tZ3RLnlkrkg",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Intro to  Linear Regression with cuML\n",
+        "Corresponding notebook to [*Beginner’s Guide to Linear Regression in Python with cuML*](https://medium.com/future-vision/beginners-guide-to-linear-regression-in-python-with-cuml-30e2709c761) story on Medium\n",
+        "\n",
+        "Linear Regression is a simple machine learning model where the response `y` is modelled by a linear combination of the predictors in `X`. The `LinearRegression` function implemented in the `cuML` library allows users to change the `fit_intercept`, `normalize`, and `algorithm` parameters. \n",
+        "\n",
+        "Here is a brief on RAPIDS' Linear Regression parameters:\n",
+        "\n",
+        "- `algorithm`: 'eig' or 'svd' (default = 'eig')\n",
+        "    - `Eig` uses a eigendecomposition of the covariance matrix, and is much faster\n",
+        "    - `SVD` is slower, but guaranteed to be stable\n",
+        "- `fit_intercept`: boolean (default = True)\n",
+        "  - If `True`, `LinearRegresssion` tries to correct for the global mean of `y`\n",
+        "  - If `False`, the model expects that you have centered the data.\n",
+        "- `normalize`: boolean (default = False)\n",
+        "  - If True, the predictors in X will be normalized by dividing by it’s L2 norm\n",
+        "  - If False, no scaling will be done\n",
+        "\n",
+        "Methods that can be used with `LinearRegression` are:\n",
+        "\n",
+        "- `fit`: Fit the model with `X` and `y`\n",
+        "- `get_params`: Sklearn style return parameter state\n",
+        "- `predict`: Predicts the `y` for `X`\n",
+        "- `set_params`: Sklearn style set parameter state to dictionary of params\n",
+        "\n",
+        "`cuML`'s `LinearRegression` expects expects either `cuDF` DataFrame or `NumPy` matrix inputs\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-tG6ezqKh1Z0",
+        "colab_type": "text"
+      },
+      "source": [
+        "Note: `CuPy` is not installed by default with RAPIDS `Conda` or `Docker` packages, but is needed for visualizing results in this notebook.\n",
+        "- install with `pip` via the cell below "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pxBcXor_0-Jd",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# install cupy\n",
+        "!pip install cupy"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N20le3_KlP3O",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Load data\n",
+        "- for this demo, we will be utilizing the Boston housing dataset from `sklearn`\n",
+        "  - start by loading in the set and printing a map of the contents"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "RFE-nxxlTajg",
+        "colab_type": "code",
+        "outputId": "04f89e88-61a3-4dd2-9088-123b410e508c",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "from sklearn.datasets import load_boston\n",
+        "\n",
+        "# load Boston dataset\n",
+        "boston = load_boston()\n",
+        "\n",
+        "# let's see what's inside\n",
+        "print(boston.keys())"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wmcO8dxO0uOB",
+        "colab_type": "text"
+      },
+      "source": [
+        "#### Boston house prices dataset\n",
+        "- a description of the dataset is provided in `DESCR`\n",
+        "  - let's explore "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "c3kLHAsP-Al2",
+        "colab_type": "code",
+        "outputId": "02518c3c-7767-42a7-b6f4-6756ace741cc",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 923
+        }
+      },
+      "source": [
+        "# what do we know about this dataset?\n",
+        "print(boston.DESCR)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            ".. _boston_dataset:\n",
+            "\n",
+            "Boston house prices dataset\n",
+            "---------------------------\n",
+            "\n",
+            "**Data Set Characteristics:**  \n",
+            "\n",
+            "    :Number of Instances: 506 \n",
+            "\n",
+            "    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
+            "\n",
+            "    :Attribute Information (in order):\n",
+            "        - CRIM     per capita crime rate by town\n",
+            "        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\n",
+            "        - INDUS    proportion of non-retail business acres per town\n",
+            "        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n",
+            "        - NOX      nitric oxides concentration (parts per 10 million)\n",
+            "        - RM       average number of rooms per dwelling\n",
+            "        - AGE      proportion of owner-occupied units built prior to 1940\n",
+            "        - DIS      weighted distances to five Boston employment centres\n",
+            "        - RAD      index of accessibility to radial highways\n",
+            "        - TAX      full-value property-tax rate per $10,000\n",
+            "        - PTRATIO  pupil-teacher ratio by town\n",
+            "        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\n",
+            "        - LSTAT    % lower status of the population\n",
+            "        - MEDV     Median value of owner-occupied homes in $1000's\n",
+            "\n",
+            "    :Missing Attribute Values: None\n",
+            "\n",
+            "    :Creator: Harrison, D. and Rubinfeld, D.L.\n",
+            "\n",
+            "This is a copy of UCI ML housing dataset.\n",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
+            "\n",
+            "\n",
+            "This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
+            "\n",
+            "The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\n",
+            "prices and the demand for clean air', J. Environ. Economics & Management,\n",
+            "vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics\n",
+            "...', Wiley, 1980.   N.B. Various transformations are used in the table on\n",
+            "pages 244-261 of the latter.\n",
+            "\n",
+            "The Boston house-price data has been used in many machine learning papers that address regression\n",
+            "problems.   \n",
+            "     \n",
+            ".. topic:: References\n",
+            "\n",
+            "   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
+            "   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
+            "\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wI_sB78vE297",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Build Dataframe\n",
+        "- Import `cuDF` and input the data into a DataFrame \n",
+        "  - Then add a `PRICE` column equal to the `target` key"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xiMmIZ8O5scJ",
+        "colab_type": "code",
+        "outputId": "fd09db1f-fb41-4494-bb8b-eab6e18c258f",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        }
+      },
+      "source": [
+        "import cudf\n",
+        "\n",
+        "# build dataframe from data key\n",
+        "bos = cudf.DataFrame(list(boston.data))\n",
+        "# set column names to feature_names\n",
+        "bos.columns = boston.feature_names\n",
+        "\n",
+        "# add PRICE column from target\n",
+        "bos['PRICE'] = boston.target\n",
+        "\n",
+        "# let's see what we're working with\n",
+        "bos.head()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>CRIM</th>\n",
+              "      <th>ZN</th>\n",
+              "      <th>INDUS</th>\n",
+              "      <th>CHAS</th>\n",
+              "      <th>NOX</th>\n",
+              "      <th>RM</th>\n",
+              "      <th>AGE</th>\n",
+              "      <th>DIS</th>\n",
+              "      <th>RAD</th>\n",
+              "      <th>TAX</th>\n",
+              "      <th>PTRATIO</th>\n",
+              "      <th>B</th>\n",
+              "      <th>LSTAT</th>\n",
+              "      <th>PRICE</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>0.00632</td>\n",
+              "      <td>18.0</td>\n",
+              "      <td>2.31</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.538</td>\n",
+              "      <td>6.575</td>\n",
+              "      <td>65.2</td>\n",
+              "      <td>4.0900</td>\n",
+              "      <td>1.0</td>\n",
+              "      <td>296.0</td>\n",
+              "      <td>15.3</td>\n",
+              "      <td>396.90</td>\n",
+              "      <td>4.98</td>\n",
+              "      <td>24.0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>0.02731</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>7.07</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.469</td>\n",
+              "      <td>6.421</td>\n",
+              "      <td>78.9</td>\n",
+              "      <td>4.9671</td>\n",
+              "      <td>2.0</td>\n",
+              "      <td>242.0</td>\n",
+              "      <td>17.8</td>\n",
+              "      <td>396.90</td>\n",
+              "      <td>9.14</td>\n",
+              "      <td>21.6</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>0.02729</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>7.07</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.469</td>\n",
+              "      <td>7.185</td>\n",
+              "      <td>61.1</td>\n",
+              "      <td>4.9671</td>\n",
+              "      <td>2.0</td>\n",
+              "      <td>242.0</td>\n",
+              "      <td>17.8</td>\n",
+              "      <td>392.83</td>\n",
+              "      <td>4.03</td>\n",
+              "      <td>34.7</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>0.03237</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>2.18</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.458</td>\n",
+              "      <td>6.998</td>\n",
+              "      <td>45.8</td>\n",
+              "      <td>6.0622</td>\n",
+              "      <td>3.0</td>\n",
+              "      <td>222.0</td>\n",
+              "      <td>18.7</td>\n",
+              "      <td>394.63</td>\n",
+              "      <td>2.94</td>\n",
+              "      <td>33.4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>0.06905</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>2.18</td>\n",
+              "      <td>0.0</td>\n",
+              "      <td>0.458</td>\n",
+              "      <td>7.147</td>\n",
+              "      <td>54.2</td>\n",
+              "      <td>6.0622</td>\n",
+              "      <td>3.0</td>\n",
+              "      <td>222.0</td>\n",
+              "      <td>18.7</td>\n",
+              "      <td>396.90</td>\n",
+              "      <td>5.33</td>\n",
+              "      <td>36.2</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "      CRIM    ZN  INDUS  CHAS    NOX  ...    TAX  PTRATIO       B  LSTAT  PRICE\n",
+              "0  0.00632  18.0   2.31   0.0  0.538  ...  296.0     15.3  396.90   4.98   24.0\n",
+              "1  0.02731   0.0   7.07   0.0  0.469  ...  242.0     17.8  396.90   9.14   21.6\n",
+              "2  0.02729   0.0   7.07   0.0  0.469  ...  242.0     17.8  392.83   4.03   34.7\n",
+              "3  0.03237   0.0   2.18   0.0  0.458  ...  222.0     18.7  394.63   2.94   33.4\n",
+              "4  0.06905   0.0   2.18   0.0  0.458  ...  222.0     18.7  396.90   5.33   36.2\n",
+              "\n",
+              "[5 rows x 14 columns]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r2qrTxo4ljZp",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Split Train from Test\n",
+        "- For basic Linear Regression, we will predict `PRICE` (Median value of owner-occupied homes) based on `TAX` (full-value property-tax rate per $10,000)\n",
+        "  - Go ahead and trim data to just these columns"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "spaDB10E3okF",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# simple linear regression X and Y\n",
+        "X = bos['TAX']\n",
+        "Y = bos['PRICE']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4TKLv8FjIBuI",
+        "colab_type": "text"
+      },
+      "source": [
+        "We can now set training and testing sets for our model\n",
+        "- Use `cuML`'s `train_test_split` to do this\n",
+        "  - Train on 70% of data\n",
+        "  - Test on 30% of data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1DC6FHsNIKH_",
+        "colab_type": "code",
+        "outputId": "4c932268-7a82-4ac3-c7b9-9966ffc2b12e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 86
+        }
+      },
+      "source": [
+        "from cuml.preprocessing.model_selection import train_test_split\n",
+        "\n",
+        "# train/test split (70:30)\n",
+        "sX_train, sX_test, sY_train, sY_test = train_test_split(X, Y, train_size = 0.7)\n",
+        "\n",
+        "# see what it looks like\n",
+        "print(sX_train.shape)\n",
+        "print(sX_test.shape)\n",
+        "print(sY_train.shape)\n",
+        "print(sY_test.shape)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "(354,)\n",
+            "(152,)\n",
+            "(354,)\n",
+            "(152,)\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZLVg44gAmJG7",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Predict Values\n",
+        "1. fit the model with `TAX` (*X_train*) and corresponding `PRICE` (*y_train*) values \n",
+        "  - so it can build an understanding of their relationship \n",
+        "2. predict `PRICE` (*y_test*) for a test set of `TAX` (*X_test*) values\n",
+        "  - and compare `PRICE` predictions to actual median house (*y_test*) values\n",
+        "    - use `sklearn`'s `mean_squared_error` to do this"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ZGMPloJxGtK3",
+        "colab_type": "code",
+        "outputId": "664b54fe-16d5-4140-a657-3dc782574da9",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "from cuml import LinearRegression\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "\n",
+        "# call Linear Regression model\n",
+        "slr = LinearRegression()\n",
+        "\n",
+        "# train the model\n",
+        "slr.fit(sX_train, sY_train)\n",
+        "\n",
+        "# make predictions for test X values\n",
+        "sY_pred = slr.predict(sX_test)\n",
+        "\n",
+        "# calculate error\n",
+        "mse = mean_squared_error(sY_test, sY_pred)\n",
+        "print(mse)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "54.32312606491228\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7BXjkPSGwqd",
+        "colab_type": "text"
+      },
+      "source": [
+        "3. visualize prediction accuracy with `matplotlib`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pp9RNPt_Iemk",
+        "colab_type": "code",
+        "outputId": "22a22472-50ad-4bb3-d104-35e9e100b8b6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 305
+        }
+      },
+      "source": [
+        "import cupy\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "# scatter actual and predicted results\n",
+        "plt.scatter(sY_test, sY_pred)\n",
+        "\n",
+        "# label graph\n",
+        "plt.xlabel(\"Actual Prices: $Y_i$\")\n",
+        "plt.ylabel(\"Predicted prices: $\\hat{Y}_i$\")\n",
+        "plt.title(\"Prices vs Predicted prices: $Y_i$ vs $\\hat{Y}_i$\")\n",
+        "\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEgCAYAAACq+TSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu4XHV97/H3J5sNbi5lQwkom8Qo\nWiiIEAiCgqeAF1REU7yVI5ba1jxaTytesGCtBCsVjULxVJ82p1DFogeRkNLjBaliFRVoQoIhBGqV\n6wa5B4JsYCf5nj9mzWb2ZGbNzJo1M2tmPq/nyZOZNWvW+s6a2eu71u+qiMDMzKyeOb0OwMzMis2J\nwszMUjlRmJlZKicKMzNL5URhZmapnCjMzCyVE4WZmaVyojCzgSXpDZLe0Os4+p3c4c7MBpGkPYDv\nJU9fExEP9zKefuZEYWYDSdIXgSuAEeBNEfH+HofUt5wozMwslesozMwslROFIWm9pGN6HUdRSPqy\npE8lj7tybCr3mdP2/J1abpwoBpCkOyRNSXpC0v3JSWjneutHxIER8cMuhtiWVj9fO5o9NklMr+5E\nDFn08juVtJukTdWJStJXJa2QpEHc9yBzohhcJ0bEzsChwCLg49UrSNqu61HlZ9A/XyZF+MwR8Siw\nHDitvEzSXwO/C5wSHawY7eW+B5kTxYCLiEngO8BLYObK9y8l/Rz4jaTtKq+GJc1LrrwelPSwpL8v\nb0vS3pIuT167XdJfVLz2l5Imk6u52yS9qjqWZJ1vVi27QNIXmt1Gxs9XN+7kPQsl3Zjs91LgORWv\nzbpTqHV8JH0VmA/8W3KX89EmjlfdfdaSxHGmpFskPSrpnyU9J+UzN4y7UYxZv5PEecDxkl4o6W3A\nEkotj55M+Yx5/T5a3rc1EBH+N2D/gDuAVyeP5wHrgb+peG1tsnyscn1KzQhvAs4HdqJ08jo6WWcO\nsBr4BLA98ELgV8DxwH7A3cDeyboLgH1rxPV84Elgl+T5CHAfcGSz22j186XFnay/PXAn8EFgFHgr\nMA18qsa+0o7PzHpNHK/UfaZ85puTz7U78JOqGGt+p2lxN3Fs6n4nwJeALzX4HV4EfBd4EDi0id9t\nLr+PLPv2vwbfTa8D8L8OfKmlk8QTwMbkhPSlqhPIH9dY/9XAy5M/rO1qbPMI4K6qZWcC/wy8CHgg\n2cZog9iuBf4wefwa4JfJ41a20fTnS4s7efw/gHtJmoony35K7USRdnxm1mvieKXuM+Uzv7fi+Rsq\njl3d7zQt7iaOTdPfSZ2YXwIE8Paq5f8LeHGnfh9Z9+1/9f/1vDzTOmZxRPx7ndfurrN8HnBnRGyu\n8drzgb0lbaxYNgL8OCL+W9JpwFLgQElXAR+KiHtrbOdrwMnAxcD/TJ7T4jZa+Xx1404e7w1MRnIW\nSdxZZ7tpx6da2n5b2Welys91Z7KdWq9Vqxd36rHJ8J1U2x54GlhRuTAi/r726kB+v48s+7Y6XEcx\nnOpV6N0NzK9TIXo3cHtEjFf82yUi3gAQEV+LiKMpnXwC+EydfVwGHCNpH+D3SU4ELW6jkcrPlxo3\npaKNiarWMPPrbDft+FQf07T9trLPSvOq1q88SaZV0taLu9Gxafc7ORi4uTpBSfphynvy+n1k2bfV\n4URhlW6gdBI7V9JOkp4j6aiK1zYlFYpjkkYkvUTS4ZL2k3ScpB2Ap4ApYGutHUTEg8APKRXB3B4R\nGwBa2UaGz1Qz7uT1nwGbgb+QNCrpJOBlKduqd3zup1TG38x+W9lnpfdL2kfS7sBfAZe2cAxqxZ16\nbHL4Tg6hVHcyQ6Xxlx6o94Ycfx8t79vqc6KwGRGxBTiRUnnwXcA9wDsqXnsjpT/A24GHgH8CdgV2\nAM5Nlv0a2JNSWXc9X6NU1vy1imWtbqOVz1QvbiLiGeAk4I+ARyh93hUp26p5fIBPAx+XtFHSR9L2\n28o+q3yN0iB3vwJ+CTTVQa9e3I2ODSnfiaR/kPQPDXZ9MFUna+ClwLoG78vj95F131aDx3oy6wOS\n7gD+NKVepi8k9Qx3RMTKYdp3v/MdhZl100HAz4dw333NrZ7MrGsi4k+Gcd/9zkVPZmaWykVPZmaW\nyonCzMxSDUQdxR577BELFizodRhmZn1l9erVD0XE3EbrDUSiWLBgAatWrep1GGZmfUVSM0PHuOjJ\nzMzSOVGYmVkqJwozM0vVs0SRzLh1TTJj13pJH0iWHyLpOklrJa2S1MxgaWZm1iG9rMzeDHw4Im6U\ntAuwWtLVwGeBsyPiO5LekDw/podxmpkNtZ4lioi4j9LQx0TEJkkbgAlK48z/VrLarswec9/63Mo1\nkyy76jbu3TjF3uNjnH78fixeOJF5PTPrvEI0j5W0AFgIXA+cBlwl6XOUisZe0bvILE8r10xy5op1\nTE1vAWBy4xRnriiN+lyZBJpdz8y6o+eJQtLOwOXAaRHxuKRPAR+MiMslvR24kNLY9NXvWwIsAZg/\nv5nJwazXll1128zJv2xqegvLrrptVgJodj0rhrS7v1qvAS3fLfoOs7d6migkjVJKEpdERHnillOB\nDySPL6M0kco2ImI5sBxg0aJFHtmwD9y7caqp5c2uZ72XdvcHbPPa6ZfdBILpLbHN+vVO/L7D7L1e\ntnoSpbuFDRFxXsVL9wK/lzw+DvhFt2Ozzth7fKyp5c2uZ72XdvdX67XprTGTJKrXz7IP645e9qM4\nCngXcFzSFHZt0srpPcDnJd0E/C1J8ZL1v9OP34+x0ZFZy8ZGR2aKI1pdz3ov7e6vlTvAtHV9h9l7\nvWz1dC2gOi8f1s1YrDvKxQSNypqbXc96b+/xMSZrnLDLd3+1Xqu3naz7sM7reWW2DZfFCyeaOuE3\nu5711unH7zer/gBm3/1VvzY6R7PqKKrXz7IP6zwnCjPLrJm7v3ZbPfkOs/cGYirURYsWhYcZNzNr\njaTVEbGo0Xq+ozCzprk/w3ByojCzprg/w/ByojDrkiJdjWeJxT3mh5cThXVdkU6Y3YqpSFfjWWOp\n19TV/RkGnycusq4qn6QmN04RPHuSWrlmcqBjKlLv4iyxrFwzWbfTk/szDD4nCuuqIp0wy7oRU5F6\nF2eJZdlVt1GrfaTA/RmGgBOFdVWRTpiN9p1nTEUavypLLPWOReCK7GHgRGFdVaQTZqN95xlTkcav\nyhJLvWMx4WKnoeBEYV1VpBNmWTdiWrxwgk+fdBAT42OI0gn20ycd1JOr8SyxFPF7s+5xz2zrumFs\n9TQIfIwGT7M9s50orLB6eWLySdHHYBh4CA/ra73sd5D3vqtPuMfuP5drbn2w0CfgPI9BPyacfoy5\nk3xHYYV01Lk/qNnBa2J8jJ+ccVzb2087EbS670ZzRlcPkV1tbHSkZ/UV9eR1/Gt9/iJ+3kr9GHNW\nzd5RuDLbCqmTTVYbdbBrZd+NtlWrj0a1XvcjqSWv41/EfjON9GPMneZEYYXUySarjU4Erey70baa\nPbEWbRiMvI5/EfvNNNKPMXeaE4UVUiebYzY6EbSy70bbavbEWrRhMPI6/kXsN9NIP8bcaU4UVkid\n7HfQ6ETQyr4bbavWCbdaEfsj5HX8+7H/RT/G3GmuzLahk2dlZTPb6sdWT3nqxxZE/RhzFu5HYZYi\nzxNBN08qw3ICs+5wojAbMLXuXkRpYL4JJw3LoPDNYyXNk3SNpFskrZf0gYrX/lzSrcnyz/YqRrMi\nqdXCqnyZV4R5PWxw9bJn9mbgwxFxo6RdgNWSrgb2At4MHBwRT0vas4cxWh8btGKaRs0zPS2pdUrP\nEkVE3AfclzzeJGkDMAG8Bzg3Ip5OXnugVzEW1aCdADuhSFOP5mXv8bG605GWDXNbf+ucQoz1JGkB\nsBC4HlgGvFLSOcBTwEci4j97F12xDOIJsBPqdYQ77dK1LL1yPRJsfHK6bqLtRDJud5unH79fw+FA\nhrmtf158IbatnicKSTsDlwOnRcTjkrYDdgeOBA4HviHphVFV6y5pCbAEYP78+V2OunfSegIP+4+5\nUtqV9cap6ZnH5US76s5HZpqsju84yhNPbWZ6a8xaB7In4zwSfHm9ZVfdxuTGqZmK7LJhb+ufB1+I\n1dbTDneSRikliUsiYkWy+B5gRZTcAGwF9qh+b0Qsj4hFEbFo7ty53Qu6xzy8QHNaubKemt7CJdfd\nNTNe06NPTs8kicp12hnrZ+mV63MZP2jxwgl+csZx3HHuCZz/jkMKMRHSIPE4T7X17I5CkoALgQ0R\ncV7FSyuBY4FrJP0OsD3wUA9CLKR65dSdLHLo9K14J7bfTDFNpWYaiWdNxivXTM66i8ljm1BKGvWO\nk4tPsvGFWG29LHo6CngXsE7S2mTZx4CLgIsk3Qw8A5xaXew0zGqdADtZ5NDpW/G07QOZT3bVxTR5\nyJqM065GO5HgXXySXS8uxPqBO9z1oW5eLXZ6Xoh62x8fG+XpzVs7NsxGq9qZj+AFZ3wr9Y4l785y\nnf7OBtkwzUUBnuFuoKUVOeSt07fi9bZTq6imUaV9vQRaeXdxb1IP0cjoiNhp++14bKp+y6hmNWrW\nmvcVv4tPsqv+rbjYrsSJwlJ1+la8mb4Bleqd7BoVYZ39b+t59Mna9QS17LT9dix904G5nCCaqS/J\ns+Wai0/a080LsX7hYcYtVaeHXK63/d12HK25fr2TXb3WKkuvXM/p37yppSQBpTuavIbEqB6yu568\nrvg9TLblzYnCUnVyXoh623/LYRPUqjpLO9mlFWFNb8lWD5dns8hys9bbzz2BiQ5PjNPp78yGjyuz\nrS15V6zXq3jebcdRzjpxdlFQ5b7nSGzpwG9ZwO3nnpDrNoetwtSKy5XZ1nGdaIZZqwgJYMftt9sm\nSZx+2U0zHeNqJYnROaXksbWJ/FHdy7msE+X6rjC1fuNEMQTKV96TG6cYSa68JxrMtNbMnULaeErL\nrrqt6ZNfZXz1VBctLb1y/Ta9p4GZOoBdx0b5zTOb2bq14e6B2kmik+X6eVeY9kMHu36IMW+d/Mzd\nPJ4uehpwrfQhKF9V71Y11hE8WzQCzTczrVWcUmta0MtXTzaMr7oPwIIzvlV33TvOPaFuX4JmjY+N\nZmr11IuTYT8UZfVDjHnr5GfOa9uFn7jIuqNeUU4t5RN/vbGOzv639Zy5Yt3MmEiNVFcGl3/c5fdP\nbpzikuvuahhfliv7dlsQ7bTDdpmSRPXn68ZkQv0wPlE/xJi3Tn7mbh9PJ4oBl2cnq0efnG65d3Pl\n/tNmaEtT6yqpXvPZOSqdsNutW8hy3Hp1MuyHDnb9EGPeOvmZu308nSgGXK87WVXuP8uPeGJ8rOaV\n/VknHsjoyLa9ErYGnLliHcfuP3ebvgStyHLcenUyrBdrr7/7Sv0QY946+Zm7fTydKAZcrc5XWYyN\njjA+VvsqfmJ8jL97xyENO3nV+xHX64SWVuS0eOEEy956MCPa9t1T01u45tYHt+lLcMqR82eej4+N\n1kw0jfabplcnw37oYNcPMeatk5+528fTrZ4GXPUoqrVaPdWaBKfWWEdA3ZFrm2nyWW/k27ccNjET\nR2V8jSqCFy+c4IOXrq352r0bpxq2LKrXGixrBXS3R/Yt64fmtv0QY946+Zm7fTzd6smA5lvrtNuq\nJ+9WQUUbKXUYm4Ba/2q21ZMTheWq2yfKXjW77Hb7eMjn6jHvuyjrb04U1nWDeNKut79uto8fnSMQ\ns8asyrK/tD41g96nwWpzorCuq1cMNCKxNaIQRVV56GRxVysdBVvdX6Nte2Kj4dO1sZ4kfTLZzlpg\nbUT8V7vbtP5UrxloeRymWmNBpSWCTowllUfi6UX7+Dz212j9Qe7TYO1pqXmspFOql0XEJ4ALgMeA\n35f0f3KKzfpMM81AKzugNerJnHcHtrx6TveifXwe+2u0/iD3abD2tNqP4l2SLpA0qwFvRNwfEVdF\nxGci4j05xmd9pNk+G+Ur10aJIO8r97wST7fbx4/O0TZ9PrLsL+37GfQ+Ddae1EQh6UBJl1Qsej0w\nBfxA0tyORmZ9p3rCnFqd4eDZK9dGiaDZK/eVayY56twf8IIzvsVR5/6g7h1CXomnkxMD1dr2srcd\nzLK3Htz2/iq3Dc9+P57YyBppVEfx78DLy08iYitwhqSTgB9LOo9S3cTNEfFk58K0flHZya1e66Dy\nlWujuZ2b6cDWSj1GnnNJd3Je5XrbzisROSFYqxoVPb0WOKdygaQ3An8KPAMcCnwOuFvSf3ckQutb\nja68GxXhNHPl3kpx0jAOI2GWh9Q7iohYB7yz/FzS7cAtwPkRcXXlupL2aWXHkuYBFwN7URo9YnlE\nXFDx+ocpJaG5EfFQK9u27kprSZR2BdvMMASNroBbKU4axmEkzPLQavPY10fErbVeiIh7WtzWZuDD\nEXGjpF2A1ZKujohbkiTyWuCuFrdpXdZuE9Z2i0JaLU5y0YtZ61pq9VQvSWQREfdFxI3J403ABqD8\nF3w+8FGam67AeqjXE9LUayX05DObG1ZuF0WzlfFmvVKI0WMlLQAWAtdLejMwGRE3qU6rGSuOXk9I\nU12cVJ4r+9Enp4F8Oul1Uic6FZrlrefzUUjaGbgcOI1ScdTHgE808b4lklZJWvXggw92OEqrpwgT\n0ixeOMFPzjiO2889gZ122G7WmEhQ7Ck3e31HZtaMniYKSaOUksQlEbEC2Bd4AXCTpDuAfYAbJT23\n+r0RsTwiFkXEorlz3aWjV4rWkqjXdzit6rd4bThlThTVJ+9aJ/MG7xdwIbAhIs6DUiuriNgzIhZE\nxALgHuDQiPh11jitszrZ+SyLItzhtKLf4rXh1E4dxYXACSnPGzkKeBewTlJ5mrKPRcS324jJeqBI\nLYl6NctcVv0Wrw2nzIkiIk5Ie97E+6+l/nTJ5XUWtB6ZDbN+6yvRb/HacMo0H4WktwHfjYhNkj5O\nqYf230TEmrwDbIbnozAza12z81FkraP46yRJHA28mlKx0z9k3JaZmRVY1kRRLlA9gdLQG98Cts8n\nJDMzK5KsiWJS0j8CfwB8W9IObWzLzMwKLOvJ/e3AVcBrI2IjsDtwem5RmZlZYWRt9TQF7AScDHwS\nGAU25hWU9ad25qPOYy5rM+uMrHcUXwKOpJQoADYBX8wlIutL7cxHnddc1mbWGVkTxRER8X7gKYCI\neBRXZg+1dsYs8nhHZsWWtehpWtIIyTDgyfzZW3OLyvpOO2MW1VtncuMUK9dMDmUR1CAU4xUlDmtf\n1kTxBeAKYE9J5wBvBT6eW1TWd9qZj7ree4GhHHK7naHHOzFseZYTvodPHyyZEkVEXCJpNfAqSsNw\nLI6IDblGZn3l2P3ncsl1d82aaaqZMYtWrpnkyWc21329XARVfXJp92q18v3jO44SAY9NTbd15ZvX\nFXRaUVyj7bXz3lqynvDzjsN6q52xnm4FcpvxzvrXyjWTXL56claSEPCWw9IHC6w+CdVTXTTV7tVq\n9fvLkxxl2VZeMVXqRDFe1mHLs57wPXz6YMlUmS3pK5LGK57vJumi/MKyflLrZBLANbemTyhV6321\nVBdftVv53Wi/WSrS86yQb2fo8byHLc96wvfw6YMla6unlyYd7YCZVk8L8wnJ+k0zJ5Na80I3c3VZ\nq/iq3avVdq7MW10/yxV0O5NB5T2RVNYTftEmtLL2ZE0UcyTtVn4iaXcKMv+2dV+jk0m9fhLjO47W\nfN+IlDoJUrtXq+1cmbe6fpYr6HYmg8p7IqmsJ/yiTWhl7cl6cv888DNJlyXP3wack09I1m8aTb5T\nr1hmh+3mMDY6ss37Gp1Q2p3sp9b7KylZpxV5T0DUzmRQeU4k1c58GUWa0Mrak7XV08VJq6djk0Un\nRcQt+YVl/aTRyaRe8ctjU9Oc/45DWj4JtTvZT3m90y5dW/P1oPUK6EGegMgnfMs0cVHReOKiYjvq\n3B/U7CcxMT7GT844rgcRlRQ1LrNu6cjERZKuTf7fJOnxin+bJD2eNVgbbEWt2CxqXGZF01LRU0Qc\nLUnAgRFxV4disgFT1GKZosZlVjRZ58xeFxEHdSCeTFz0ZGbWumaLnrK2erpR0uER8Z8Z329Dot6w\nFh4wzqx/ZE0URwCnSLoD+A2lFoURES/NKzDrf/WGtVh15yNcvnrSA8aZ9YmsieL4dncsaR5wMbAX\npRaJyyPiAknLgBOBZ4BfAu+u7AVu2ZWv4ic3TpUye7J8tx1HOevEA9s6SVffIRy7/1y+fv3dbKkq\n2pya3lJ3uQeMMyumrD2z7wfeApwPnAeclCxrxWbgwxFxAKXZ8t4v6QDgauAlyd3JfwFnZozRKlT2\njgZmDeD36JPTnP7NmzLPKFer5/W/XHfXNsmgrN7yekONm1lvZU0UFwMHAv8b+HvgAOCrrWwgIu6L\niBuTx5uADcBERHwvIsrjTl8H7JMxRqvQaCC86S2ReUa5Zgf3a2REansbZpa/rEVPL0nuBMqukZS5\nZ7akBZQGFby+6qU/Bi6t854lwBKA+fPnZ9310OjEQHhZ3lc9ZEelencaZtZbWe8obpR0ZPmJpCOA\nTO1TJe0MXA6cFhGPVyz/K0rFU5fUel9ELI+IRRGxaO7cuVl2PVQ6MRBeq+8bkWYGiqul3nIz662s\nieIw4KeS7khaPv0MOFzSOkk/b3YjkkYpJYlLImJFxfI/At4IvDMGYYyRAqjVC7nS6Igy90hutG0o\n3Ul8/u0Hs3jhhHtEm/WZrEVPr2t3x0kP7wuBDRFxXsXy1wEfBX4vIp5sdz9WUtkLOe9WT7V6OB+7\n/1yuufXBmv0k3CParL/0bFBASUcDPwbWAVuTxR8DvgDsADycLLsuIt6bti33zDYza12ne2a3LSKu\npdRRr9q3ux2LmZnVl7WOwszMhoQThZmZpWqp6EnSh9Jer6yUNjOzwdBqHcUuyf/7AYcDVybPTwRu\nyCsoMzMrjlYnLjobQNKPgEOToTeQtBT4Vu7RmZlZz2Wto9iL0uiuZc8ky8zMbMBkbR57MXCDpCuS\n54uBr+QTkpmZFUmmRBER50j6DvDKZNG7I2JNfmGZmVlRZCp6SobfOADYNSIuAB6W9LJcIzMzs0LI\nWkfxJeDlwMnJ803AF3OJyMzMCiXznNkRcaikNQAR8aik7XOMy8zMCiLrHcW0pBGSAUglzeXZgf3M\nzGyAZE0UXwCuAPaUdA5wLfDp3KIyM7PCyNrq6RJJq4FXURoBdnFEbMg1MjMzK4RMiULSZyLiL4Fb\naywzM7MBkrXo6TU1lr2+nUDMzKyYWh099n3AnwH7Vs2NvQvw0zwDMzOzYmi16OlrwHcoVVyfUbF8\nU0Q8kltUZmZWGC0VPUXEYxFxB6VBAB+LiDsj4k4gJF3UiQDNzKy3stZRvDQiNpafRMSjwMJ8QjIz\nsyLJmijmSNqt/ETS7mTv5W1mZgWW9eT+eeBnki5Lnr8NOCefkMzMrEiydri7OOlwd2yy6KSIuCW/\nsMzMrCgyFxdFxHpgfdb3S5pHaQKkvSiNGbU8Ii5IirEuBRYAdwBvT+pAzMysB1qqo5B0bfL/JkmP\nV/zbJOnxFve9GfhwRBwAHAm8X9IBlJrdfj8iXgx8n9nNcM3MrMtauqOIiKOT/3dpd8cRcR9wX/J4\nk6QNwATwZuCYZLWvAD8EPDSImVmPtNoz+0Npr0fEeVmCkLSAUvPa64G9kiQC8GtKRVO13rMEWAIw\nf/78LLs1M7MmtNo8dpfk3yLgfZTuACaA9wKHZglA0s7A5cBpETGr+CoigmTOi2oRsTwiFkXEorlz\n52bZtZmZNaHVoqezAST9CDg0IjYlz5cC32p155JGKSWJSyJiRbL4fknPi4j7JD0PeKDV7ZqZWX6y\ndrjbi9IwHmXPUKeIqB5JAi4ENlQVWV0JnJo8PhX414wxmplZDrI2j70YuEHSFcnzxZQqnltxFPAu\nYJ2ktcmyjwHnAt+Q9CfAncDbM8ZoZmY5yNrh7hxJ3wFemSx6d0SsaXEb11KaHa+WV2WJy8zM8pep\n6CkpNjoA2DUiLgAelvSyXCMzM7NCyFpH8SXg5cDJyfNNwBdzicjMzAolax3FERFxqKQ1UBpmXNL2\nOcZlZmYFkfWOYlrSCEkfB0lzga25RWVmZoWRNVF8AbgC2FPSOcC1wN/mFpWZmRVGy0VPSUX2j4DV\nlFonCVgcERtyjs3MzAqg5UQRESHp2xFxEHBrB2IyM7MCyVr0dKOkw3ONxMzMCilzqyfgFEl3AL+h\nVPwUEfHSvAIzM7NiyJoojs81CjMzK6xW56N4DqUhxV8ErAMujIjNnQjMzMyKodU6iq9QmotiHfB6\n4PO5R2RmZoXSatHTAUlrJyRdCNyQf0hmZlYkrd5RTJcfuMjJzGw4tHpHcbCk8nSlAsaS5+VWT7+V\na3RmZtZzrU6FOtKpQMzMrJiydrgzM7Mh4URhZmapnCjMzCyVE4WZmaVyojAzs1ROFGZmlsqJwszM\nUvU0UUi6SNIDkm6uWHaIpOskrZW0StLLehmjmdmw6/UdxZeB11Ut+yxwdkQcAnwieW5mZj3S00QR\nET8CHqleDJSHAtkVuLerQZmZ2SxZJy7qpNOAqyR9jlIie0WtlSQtAZYAzJ8/v3vRmZkNmV4XPdXy\nPuCDETEP+CBwYa2VImJ5RCyKiEVz587taoBmZsOkiIniVGBF8vgywJXZZmY9VMREcS/we8nj44Bf\n9DAWM7Oh19M6CklfB44B9pB0D3AW8B7gAknbAU+R1EOYmVlv9DRRRMTJdV46rKuBmJlZXUUsejIz\nswJxojAzs1ROFGZmlsqJwszMUjlRmJlZKicKMzNL5URhZmapnCjMzCyVE4WZmaVyojAzs1ROFGZm\nlsqJwszMUjlRmJlZqiJOhdoVK9dMsuyq27h34xR7j49x+vH7Acxaduz+c7nm1gdnrbN44cTMeyc3\nTs1sb0Ti5CPm8anFB/Hxlev4+vV3syVi5vWJqvcvvXI9G6emAdhtx1HOOvHAbbY9R7A12YQoTSY+\nPjaKBI8+Oc2IxJaImf/HRufw9OatbI1SPEe+cDfueHhqm89Yve8DnrcLP/3VI1SEy/jYKEvfVIoJ\nqPmZKh217+5c8p6XZzrW4zuOEgEbp7b9TBM1vod630va/ipfb0ZRtmFWTzd/X4o6f/j9ZNGiRbFq\n1aqm11+5ZpIzV6xjanrLzLLkIdRbAAAKXUlEQVTREUHA9Nb6x2NsdIS3HDbB5asnZ7230ov33Ilf\nPPCb1PdfesPd2+xndES84/B5qdtu1+ic0sk35SNus/6ytx3Mqjsf4V+uu6vh+rWSRdZj3Yqx0RE+\nfdJBM4m2en+VrzejKNswqyev35ek1RGxqNF6Q1n0tOyq27Y5GU9viYYnrqnpLXz9+rtTT+T1kkTl\n+2vtZ3pLNNx2u6a3Np8kyusvu+o2vn793U2t/5NfPrLNsqzHuhVT01tYdtVtdfdX+XozirINs3q6\n/fsayqKneyuKjFpVr+glj/e3u+1OuHfjFO1E1c6xzrKfevtrJY6ibMOsnm7/vobyjmLv8bHM7x2R\n2tp32vvb3XYn7D0+1lZc7RzrLPupt79W4ijKNszq6fbvaygTxenH78fY6MisZaMjYnRO+glxbHSE\nk4+Yt817K714z50avr/WfkZH1HDb7RqdIxp8xG3WP/34/Tj5iHlNrX/UvrtvsyzrsW7F2OjITAV5\nrf1Vvt6MomzDrJ5u/75Gli5d2pENd9Py5cuXLlmypOn193/eb7HPbmOsm3yMJ57azMT4GEvfdCCv\nPfC5s5a9+ZC9efiJZ2aef+LEA/izY180895NT22e2eaIxDuPnM+X330EDz3xNOsnH59VZFP5/vm7\n78h1v3qYpzZvBUotj875/YO22facUp0vUGr1BKXWSGPbj/DU9FZGJCLZdwBjo3PYGjGz7BX77s7W\nYNZnPP7A526z78Pmj3NP1S3r+Ngof5tUjB23/141P1Oleq2emjnWu+04ynO2G+Gpzdt+plrfQ63v\npVyBV2t/la9n/X30Yhtm9eT1+zr77LPvW7p06fJG6w1lqyczM3OrJzMzy4kThZmZpeppopB0kaQH\nJN1ctfzPJd0qab2kz/YqPjMz6/0dxZeB11UukHQs8Gbg4Ig4EPhcD+IyM7NETxNFRPwIqO7O+z7g\n3Ih4Olnnga4HZmZmM3p9R1HL7wCvlHS9pP+QdHitlSQtkbRK0qoHH3ywyyGamQ2PIiaK7YDdgSOB\n04FvSNt2DY6I5RGxKCIWzZ07t9sxmpkNjSIminuAFVFyA7AV2KPHMZmZDa0iJoqVwLEAkn4H2B54\nqKcRmZkNsZ6OHivp68AxwB6S7gHOAi4CLkqazD4DnBqD0H28hzyBjpm1o6eJIiJOrvPSKV0NZIBV\nT3AyuXGKM1esA3CyMLOmFLHoyXLkCXTMrF1OFAPOE+iYWbucKAacJ9Axs3Y5UQw4T6BjZu0ayjmz\nh0m5wtqtnswsKyeKIbB44YQTg5ll5qInMzNL5URhZmapnCjMzCyVE4WZmaVyojAzs1QahPH2JD0I\n3NnrONq0Bx4lt5KPx2w+Hs/ysZitnePx/IhoOKHPQCSKQSBpVUQs6nUcReHjMZuPx7N8LGbrxvFw\n0ZOZmaVyojAzs1ROFMWxvNcBFIyPx2w+Hs/ysZit48fDdRRmZpbKdxRmZpbKicLMzFI5UfSApIsk\nPSDp5oplu0u6WtIvkv9362WM3SJpnqRrJN0iab2kDyTLh/V4PEfSDZJuSo7H2cnyF0i6XtJ/S7pU\n0va9jrVbJI1IWiPp/yXPh/lY3CFpnaS1klYlyzr+t+JE0RtfBl5XtewM4PsR8WLg+8nzYbAZ+HBE\nHAAcCbxf0gEM7/F4GjguIg4GDgFeJ+lI4DPA+RHxIuBR4E96GGO3fQDYUPF8mI8FwLERcUhF34mO\n/604UfRARPwIeKRq8ZuBrySPvwIs7mpQPRIR90XEjcnjTZROCBMM7/GIiHgieTqa/AvgOOCbyfKh\nOR6S9gFOAP4peS6G9Fik6PjfihNFcewVEfclj38N7NXLYHpB0gJgIXA9Q3w8kqKWtcADwNXAL4GN\nEbE5WeUeSsl0GPwd8FFga/L8txneYwGli4bvSVotaUmyrON/K57hroAiIiQNVbtlSTsDlwOnRcTj\npQvHkmE7HhGxBThE0jhwBbB/j0PqCUlvBB6IiNWSjul1PAVxdERMStoTuFrSrZUvdupvxXcUxXG/\npOcBJP8/0ON4ukbSKKUkcUlErEgWD+3xKIuIjcA1wMuBcUnlC7t9gMmeBdY9RwFvknQH8H8pFTld\nwHAeCwAiYjL5/wFKFxEvowt/K04UxXElcGry+FTgX3sYS9ckZc4XAhsi4ryKl4b1eMxN7iSQNAa8\nhlK9zTXAW5PVhuJ4RMSZEbFPRCwA/gD4QUS8kyE8FgCSdpK0S/kx8FrgZrrwt+Ke2T0g6evAMZSG\nB74fOAtYCXwDmE9pyPS3R0R1hffAkXQ08GNgHc+WQ3+MUj3FMB6Pl1KqkByhdCH3jYj4pKQXUrqq\n3h1YA5wSEU/3LtLuSoqePhIRbxzWY5F87iuSp9sBX4uIcyT9Nh3+W3GiMDOzVC56MjOzVE4UZmaW\nyonCzMxSOVGYmVkqJwozM0vlRGFmZqmcKGxgSFosKSSlDnkhaVzSn7W5ryfqLN+SDAF9s6TLJO1Y\nZ72ftrP/Zkn63WRo6jnJ8xFJ35P0h93Yvw0GJwobJCcD1yb/pxkH2koUKaaSIaBfAjwDvLfyRZXM\niYhXdGj/s0TEBko9u9+YLDoHuC0iLu7G/m0wOFHYQEgGFTya0twEf1Cx/A8l/TyZCOiryeJzgX2T\nK/9lkhZUTSL1EUlLk8crk5E611eM1tmsHwMvSrZ/m6SLKQ25MK/yjqROjEg6JZnEaK2kf0zuBnaS\n9K1k3ZslvaOJOM4H3ifpLZTGT/pQi5/DhpxHj7VB8WbguxHxX5IelnQY8BTwceAVEfGQpN2Tdc8A\nXhIRh8DM8Ob1/HFEPJKMu/Sfki6PiIcbBZMMWvd64LvJohcDp0bEdcnr5fUOrBWjpN8F3gEcFRHT\nkr4EvBP4DXBvRJyQrLdr8v+3gT+NiHurY4mI70n6PPBp4H9ExHSj+M0q+Y7CBsXJlMb/Ifn/ZEqj\njV4WEQ8BZBz/5i8k3QRcB8yjdMJPM5bMJbEKuIvSgIcAd5aTRJV6Mb4KOIxSclqbPH8hpTGxXiPp\nM5JeGRGPJe97Q60kUeGnwHkR8evyAkl/0+CzmAG+o7ABkFyFHwcclIzFP0JpgpdlTW5iM7Mvmp6T\nbPcY4NXAyyPiSUk/LL+WYqp8p1IRH5TuBFoh4CsRceY2L0iHAm8APiXp+xHxySa2dwDwzxXbeC6l\n2fPMGvIdhQ2CtwJfjYjnR8SCiJgH3A78HHhbMromFUVPm4BdKt5/P7CnpN+WtAPPVvzuCjyaJIn9\nKc3pnbcf1Inx+8BbkwlqkLS7pOdL2ht4MiL+hVIiPLTJ/RxIqX6k7BBgbR4fwAafE4UNgpN5dvjl\nssspVWqfA/xHUnx0HkBSx/CTpDJ4WVJm/0ngBkpTj5ZnDfsusJ2kDZQqwGsVHbUlItbXifEWSnUX\n35P08ySu5wEHATckxVFnAZ+CUh1FkkS2IWkepelDK5v0OlFY0zzMuNkQknQh8J6I2NpwZRt6ThRm\nZpbKRU9mZpbKicLMzFI5UZiZWSonCjMzS+VEYWZmqZwozMwslROFmZmlcqIwM7NUThRmZpbq/wNP\ni6cKUGWQlQAAAABJRU5ErkJggg==\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8MqX73B4s5tv",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Multiple Linear Regression \n",
+        "- Our mean squared error for Simple Linear Regression looks kinda high..\n",
+        "  - Let's try Multiple Linear Regression (predicting based on multiple variables rather than just `TAX`) and see if that produces more accurate predictions\n",
+        "\n",
+        "1. Set X to contain all values that are not `PRICE` from the unsplit data\n",
+        "  - i.e. `CRIM`, `ZN`, `INDUS`, `CHAS`, `NOX`, `RM`, `AGE`, `DIS`, `RAD`, `TAX`, `PTRATIO`, `B`, `LSTAT`\n",
+        "  - Y to still represent just 1 target value (`PRICE`)\n",
+        "    - also from the unsplit data\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ZtQK5-f4M0Vg",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# set X to all variables except price\n",
+        "mX = bos.drop('PRICE', axis=1)\n",
+        "# and, like in the simple Linear Regression, set Y to price\n",
+        "mY = bos['PRICE']"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RTYG4-UwNDsK",
+        "colab_type": "text"
+      },
+      "source": [
+        "2. Split the data into `multi_X_train`, `multi_X_test`, `Y_train`, and `Y_test`\n",
+        "  - Use `cuML`'s `train_test_split`\n",
+        "    - And the same 70:30 train:test ratio"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EsKxK8u_F7t8",
+        "colab_type": "code",
+        "outputId": "673a1a44-4d2f-4a45-8333-8f29782eaf65",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 86
+        }
+      },
+      "source": [
+        "# train/test split (70:30)\n",
+        "mX_train, mX_test, mY_train, mY_test = train_test_split(mX, mY, train_size = 0.7)\n",
+        "\n",
+        "# see what it looks like\n",
+        "print(mX_train.shape)\n",
+        "print(mX_test.shape)\n",
+        "print(mY_train.shape)\n",
+        "print(mY_test.shape)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "(354, 13)\n",
+            "(152, 13)\n",
+            "(354,)\n",
+            "(152,)\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_Y40R17LGHsI",
+        "colab_type": "text"
+      },
+      "source": [
+        "3. fit the model with `multi_X_train` and corresponding `PRICE` (*y_train*) values \n",
+        "  - so it can build an understanding of their relationships \n",
+        "4. predict `PRICE` (*y_test*) for the test set of independent (*multi_X_test*) values\n",
+        "  - and compare `PRICE` predictions to actual median house (*y_test*) values\n",
+        "    - use `sklearn`'s `mean_squared_error` to do this"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "N7qm1HuVO-1k",
+        "colab_type": "code",
+        "outputId": "7e291cec-e602-4ad9-a5b3-b70d7261f63d",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "# call Linear Regression model\n",
+        "mlr = LinearRegression()\n",
+        "\n",
+        "# train the model for multiple regression\n",
+        "mlr.fit(mX_train, mY_train)\n",
+        "\n",
+        "# make predictions for test X values\n",
+        "mY_pred = mlr.predict(mX_test)\n",
+        "\n",
+        "# calculate error\n",
+        "mmse = mean_squared_error(mY_test, mY_pred)\n",
+        "print(mmse)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "16.691811854229723\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jTdmleXCM_Xb",
+        "colab_type": "text"
+      },
+      "source": [
+        "5. visualize with `matplotlib`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Q83NFMK1JKvL",
+        "colab_type": "code",
+        "outputId": "569cfa77-a66e-4b1b-9d70-ae4ef8e7936e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 305
+        }
+      },
+      "source": [
+        "# scatter actual and predicted results\n",
+        "plt.scatter(mY_test, mY_pred)\n",
+        "\n",
+        "# label graph\n",
+        "plt.xlabel(\"Actual Prices: $Y_i$\")\n",
+        "plt.ylabel(\"Predicted prices: $\\hat{Y}_i$\")\n",
+        "plt.title(\"Prices vs Predicted prices: $Y_i$ vs $\\hat{Y}_i$\")\n",
+        "\n",
+        "plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEgCAYAAACq+TSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X20HXV97/H3J4cTOCBygkYqAYxP\nCyQiCeJTQ7skSqMimiKKXLXc1tbV1q4K0tRwLy1I5RJXqlivWsuqtqBYAwKRVitYE66KRZuYQIhA\nfUDQg0JUjoIc4CT53j/27GRnZ8/sPbNnP39ea5119syeM/Pbc86Z78zv4ftTRGBmZpZmTq8LYGZm\n/c2BwszMMjlQmJlZJgcKMzPL5EBhZmaZHCjMzCyTA4WZmWVyoDCzoSTpNZJe0+tyDAN5wJ2ZDRtJ\nTwVuShZPiYif97I8g86BwsyGjqSPAtcDY8DrIuKdPS7SQHOgMDOzTG6jMDOzTA4UI07SNkkv73U5\n+oWkf5b0vuR1V85N7TFL2p9/p1YqB4ohI+mHkmYkPSLpgeQi9KS07SNiUUTc3MUitiXv52tHq+cm\nKdMrO1GGInr5O5U0T9LD9YFK0qckXSdJw3jsYedAMZxOi4gnAScAJwIX1G8gab+ul6o8w/75CumH\nzxwRDwGXA+dU10n6K+B5wFujg42ivTz2sHOgGGIRMQX8O/B82H3n+x5JtwO/lrRf7d2wpCOTO6/t\nkn4u6SPVfUk6XNK1yXv3SPrzmvfeI2kquZu7W9Ir6suSbPO5unV/J+nDre6j4OdLLXfyM0skfTs5\n7lrggJr39npSaHR+JH0KOAr41+Qp5y9bOF+px2wkKcf5kr4j6SFJ/yTpgIzP3LTczcpY9HeS+CCw\nXNKzJL0ReAeVnkePZnzGsv4+ch/bWhAR/hqiL+CHwCuT10cC24C/qXlvS7J+onZ7Kt0IbwMuAw6i\ncvE6KdlmDrAJ+GtgLvAs4AfAcuBo4EfA4cm2C4FnNyjXM4BHgYOT5THgJ8BLW91H3s+XVe5k+7nA\nvcC5wDhwBjALvK/BsbLOz+7tWjhfmcfM+Mx3JJ/rUOCWujI2/J1mlbuFc5P6OwE+Bnysyd/hJ4Ev\nAduBE1r4uy3l76PIsf3VwnWl1wXwV8m/0MpF4hFgOrkgfazuAvIHDbZ/JfCy5B9rvwb7fAlwX926\n84F/Ap4DPJjsY7xJ2b4O/F7y+hTg+8nrPPto+fNllTt5/dvA/STdxJN136BxoMg6P7u3a+F8ZR4z\n4zP/cc3ya2rOXervNKvcLZybln8nKWV+PhDAm+rW/xnw3E79fRQ9tr+yv3pep2kdsSIi/iPlvR+l\nrD8SuDcidjR47xnA4ZKma9aNAV+LiO9JOge4CFgk6Ubg3RFxf4P9fAY4C7gS+B/JMjn3kefzpZY7\neX04MBXJVSRxb8p+s85Pvazj5jlmrdrPdW+yn0bv1Usrd+a5KfA7qTcXeBy4rnZlRHyk8eZAeX8f\nRY5tGdxGMXrSGvR+BByV0iD6I+CeiJis+To4Il4DEBGfiYiTqFx8Anh/yjGuAV4u6Qjgd0kuBDn3\n0Uzt58ssN5WqjQV1vWGOStlv1vmpP6dZx81zzFpH1m1fe5HMaqRNK3ezc9Pu7+R44I76ACXp5oyf\nKevvo8ixLYMDhVV9i8pFbLWkgyQdIGlpzXsPJw2KE5LGJD1f0oskHS1pmaT9gceAGWBXowNExHbg\nZipVMPdExJ0AefZR4DM1LHfy/n8CO4A/lzQu6XTgxRn7Sjs/D1Cp42/luHmOWeudko6QdCjwv4G1\nOc5Bo3JnnpsSfieLqbSd7KZK/qUH036gxL+P3Me2bA4UBkBE7AROo1IffB/wY+DMmvdeS+Uf8B7g\nZ8A/AocA+wOrk3U/BZ5Gpa47zWeo1DV/pmZd3n3k+Uxp5SYingBOB/4n8Asqn/e6jH01PD/ApcAF\nkqYl/UXWcfMcs85nqCS5+wHwfaClAXpp5W52bsj4nUj6uKSPNzn08dRdrIEXAFub/FwZfx9Fj20p\nnOvJrM9J+iHwhxntMgMhaWf4YUSsG6VjDwM/UZhZtxwH3D6Cxx547vVkZl0REW8fxWMPA1c9mZlZ\nJlc9mZlZJgcKMzPLNBRtFE996lNj4cKFvS6GmdlA2bRp088iYn6z7YYiUCxcuJCNGzf2uhhmZgNF\nUivpY1z1ZGZm2RwozMwskwOFmZllcqAwM7NMDhRmZpZpKHo9mZn1g3Wbp1hz493cPz3D4ZMTrFx+\nNCuWLOjIfso6ViscKMzMSrBu8xTnX7eVmdmdAExNz3D+dZXM5nku4K3sp6xjtarnVU/JhCmbJf1b\nsvxMSd+U9D1JayXN7XUZzcyaWXPj3bsv3FUzsztZc+Pdpe+nrGO1queBAngXcGfN8vuByyLiOcBD\ngLM+mlnfu396Jtf6dvZT1rFa1dNAkcyNeyqVmbVI5hFeBnwu2eQKYEVvSmdm1rrDJydyrW9nP2Ud\nq1W9fqL4EPCX7Jn/9inAdM2k6D8GGla4SXqHpI2SNm7fvr3zJTUzy7By+dFMjI/ttW5ifIyVy48u\nfT9lHatVPQsUkl4LPBgRm4r8fERcHhEnRsSJ8+c3zWllZtZRK5Ys4NLTj2PB5AQCFkxOcOnpx+Vu\nXG5lP2Udq1U9m7hI0qXA24AdwAHAk4HrgeXAb0TEDkkvAy6KiOVZ+zrxxBPDSQHNzPKRtCkiTmy2\nXc+eKCLi/Ig4IiIWAm8G1kfEW4ANwBnJZmcDn+9REc3MjN63UTTyHuDdkr5Hpc3iEz0uj5nZSOuL\nAXcRcTNwc/L6B8CLe1keMzPboy8ChZlZv+tmyox+40BhZtZEt1Nm9Jt+bKMwM+sr3U6Z0W8cKMzM\nmuh2yox+40BhZtZEt1Nm9Bu3UZhZYaPSwLty+dF7tVFAZ1NmtMLzUZhZ3xulBt7q5+mXoNjtc+9A\nYTbEOnnXmdXAO2yBAioX4H75XN0+926jMBtS1bvOqekZgj13nes2T5Wy/7SG3KnpGZauXl/acWxf\nIzUfhZl1Tqe7dGY15JYdlGxvozYfhZllWLd5iqWr1/PMVV/IfZfe7I6/yD5rNZoTodYojTPotm7P\nR+E2CrM+1W6D5eGTE0w1CBZK9lVkn7VqG3gbHQdGZ5xBt3W7cb1n81GUyfNR2DBaunp9wwvwgskJ\nblm1rOnP1wcaqASJRv/xre6zU2W13uj7+SjMLFu7DZaNZkFLuy1s986/21Uh1l2uejLrU2lVR3ka\nLGu7dK7bPMW5a7c0DBbtNoL22ziDfjEsAxIdKMz6VNmjgdfceHfDIKHkWO3qp3EG/WCYBiQ6UJj1\nqbLv0tOql4LsC1e7d8XDcled1zANSHSgMOtjZd6lp1VlLciodlq3eYqV19zG7K7Ks8jU9Awrr7lt\nd9maGaa76ryGKeOsG7PNRkSRBueLbti2O0hUze4KLrphW0vHHOV5HIYp46yfKMyGQCvVO0WqsqZn\nZnOtrzdMd9V5dTrjrLPHmlnLGlXvnLt2C+es3cKCugtItxucy+i5Nag62RPM2WPNLJdG1TvVyqJ2\nLyDzDhznoUf3fXqYd+D47tdZd7b9OI9DN3UqMDt7rJnl0qwap502gQtPW8T4mPZaNz4mLjxtEdA8\nQ22jQX+Xnn7c0Ddkd1q3q/T8RGE24NKqd2oVvYA0qz5Ju7M97+o9PaM8vqJ83a7Sc6AwG3CNqnfq\ntXMBybrQpwWgnREj0w22F7pdpedAYdZj7fZeqc/iWp/4r5MXkKynmUEdXDYInD22AGePtUHVKMPr\nxPhYW/X43ew22aj8tQTcs/rUjhzb2tdq9lg/UZj1ULMBaUUu+N1sE6ge57yrb2Nng5vOUegGOwoc\nKMx6KGsWurR+8tBfWVqrxx7lbrDDzoHCrIcOmRhvOMpZ0PBJ46IbtvH4jl19lzvJacaHmwOFjZR+\ny2QqNV6f1nLYKKj0S6Oxu8EOLwcKGxn9mMl0usGo5yJGIXeS9Y5HZtvIyJPJdN3mKZauXs8zV32B\npavX7x5pXLa0xt55B443zPRamzqjlf2YlcFPFDbUaquaWp0vuptPHmkDp6opMuqrycCNxtZ9PQsU\nkg4Avgrsn5TjcxFxoaRnAp8FngJsAt4WEU/0qpw2uJr18a+qvxvvZsK1Zo3AacfL286ybvMUF92w\nbXcbx7wDx7nwtEVuU7CW9PKJ4nFgWUQ8Imkc+LqkfwfeDVwWEZ+V9HHg7cDf97CcNqAaXfDrNbob\n73bCtbyNwHm3r5+lDuChR2dZ+bnWZ6qz0dazNoqoeCRZHE++AlgGfC5ZfwWwogfFsyGQdWEXlbvq\n/febw7lrt+zVDjFMM5NBJWDWz1IHMLszRmKmOWtfTxuzJY1J2gI8CHwZ+D4wHRE7kk1+DPh2xwpJ\nu7AvmJzgsjMX89jsLqZnZvdJj11kytB+lhUw3VvKWtHTQBEROyNiMXAE8GLgmFZ/VtI7JG2UtHH7\n9u0dK6MNrqwLfrN2iGGaQyHrSWhQn5Ksu/qi11NETEvaALwMmJS0X/JUcQTQsF9iRFwOXA6VpIBd\nK6wNlAPG5+wOCJMT41z0ukoD7rlrtzTcvnqHPUyDx1YuP3qfNgqoTEA0qE9J1l09e6KQNF/SZPJ6\nAjgFuBPYAJyRbHY28PnelNAGWbXHU+00no/v2LX7ddqddEBHx01Uy9aNMRpVK5YsYM0bj2dyYs8Y\njHkHjrPmjOOHJhhaZ/UszbikF1BprB6jErCujoiLJT2LSvfYQ4HNwFsj4vGsfTnNuNVbunp9w3kS\nFkxOcMuqZU27zrab6jtNJ9KKmxXV92nGI+J2YEmD9T+g0l5hVlizLq71k/3U69S4iW6O0TAri1N4\n2FBqpYvriiULuGXVMlLy8nWkR1C3x2iYlcGBwoZSsy6ute0Ec1JSuHaiR9CwjdGw0dB21ZOki5P9\nbAG2RMR/t10qs0SetOD1277hhQvYcNf2fX62vp2g0cxsnRo3kZbbyb2PrJ/lChSS3hoRn65dFxF/\nLekwYDHwu5KeExF/VGYhbTTlSc7XaNtrN001bCROS+0xJrEroq15KpoFNk/wY4Mo7xPF2yS9CHh3\nROz+T4uIB4Abky+zUuRp+E3b9ryr981nlNYesCuCe1afWri8rQa2YRqjYaMhs41C0iJJV9WsejUw\nA6yXNL+jJbORl6fhN23bnRGcf91WLli3teNtEnnmuzAbJM2eKP6DymhpACJiF7BK0unA1yR9kErb\nxB0R8Wjnimmj6PDJiYZdVxtd0NPmnobKxfqqW+/bPR9Fp9ok3KPJhlWzXk+/A1xSu0LSa4E/BJ4A\nTgD+FviRpO91pIQ2svIk50ube7oqa1ipgDe8sP3qIPdosmGV+UQREVuBt1SXJd0DfIfKfBFfrt1W\n0hEdKaGNrOqF+73/um13Ko7992t8b9PO3NMBbLirklgyTy+reu7RZMMqb2P2qyPirkZvRMSPSyiP\n2T4em92To2l6ZnZ3AzHs6T00R2pYpQSVJ4ZmiWrun55pewpU92iyYdWzXE9lcq6n4ZWWs2lyYpzH\nd+xqOoPd5MQ4iw4/mFu+/4vM7RYk1UONjjUm8YE3OYGeDZ++z/Vko6VolU5aQ3Baw3X9WAhg95Sf\nWU4+Zj5X3Xpfw/eqPaeg8fgNP0HYsHMKD+u4apXO1PTMPrPJNZO3Ibg6FuKWVcsAOO/q25jd2fyp\necNd2zOP1aibazufy2yQOFBYx7UzviCt59O8A8cbbl+92Fcv4mntFvWmpmcaVjvVqn+68bgJGxWF\nq54k/UZE/DRt2ayqnfEFaQ3EQGYPo7Q0He2of+LI+7lcTWWDqp02ik8Ap2YsmwH5Bs41kpXyIu3C\nW/Ygt/E5+04bmvW56oPCycfM59pNU4V7VJn1UuGqp4g4NWvZrKpR9RHArx/f0VZ9fnU+icvOXAzA\nOWu38Ozzv8jCjDQdhTXYXVq12MnHzN+n7eKqW+9zNZUNrEKBQtIbJR2cvL5A0nWS9pmtzgwqF/RL\nTz9un3aF6ZlZzlm7hSUX31Q4YNQ2KMOe9BxpaTo+dObi1ImKsszujH0u6tXPtWByAlHpYnvp6cex\n4a7t+wSFtJYSp/ewQVD0ieKvIuJhSScBr6RS7fTx8oplw2bFkgUcOLdxTedDj84W7i3UrC1iTNrr\nIr5iyYLCKTUaXdSrTzXVnlYrlizIdfF3eg8bBEUDRfU/81Tg8oj4AjC3nCLZsMq6gBathml2Ud4Z\nwWVnLt59EYf0qrBm6i/qtbPkLV29fnegS7v41z/JOL2HDYqijdlTkv6BStLA90vaH3e1tSbSGn+r\nWrkTr28kzsoaW1XfaFzfk2rywHEeeWwHs7vSu9LWX9Sz0n2k5XxKm3HPrN8VDRRvAl4F/G1ETEt6\nOrCyvGLZMGp0Aa2VdideDQ5T0zN75W1qNu6hqtFkR7U9qZauXr876WCtrBnvssZQVAf7uSusDYui\ngWIGOAg4C7gYGAemyyqUDafqhfKiG7bt8xSQVg1Tf+deNDNZ1tNKkRnvmo2h8Cx2NkyKBoqPAbuA\nZVQCxcPAtcCLSiqXDZhWB5NVL6BZ29e+l5UVNo9DJhqP5Ib0KrE5Eus2TzX8HO2ODTEbJEUDxUsi\n4gRJmwEi4iFJbsweUUXSc6fdcdfvq4wgAfDrJ3akXvTTqsSykgF67gkbJUUboGcljZHUBCTzZ+/K\n/hEbVmXmPOpE6g1oPA6iqjoeYqzBIL20z5E2hsLVTTaMij5RfBi4HniapEuAM4ALSiuVDZQy54ru\n5AC0rH2vWLKAc9duyfVzboewUVEoUETEVZI2Aa+g0j18RUTcWWrJbGCUWV/frAttO5qVx+0OZo21\nk+vproj4aER8xEFitKXlPCpSX9/qYDhRmb1uTov5OFopT5mfw2yYFM31dIWkyZrleZI+WV6xbJDk\nra9PG9Fcv680CyYnuOzMxTy+YxcZY+Rytx+43cGssUJzZkvaHBFLmq3rFs+ZPTguWLeVq269b6/x\nEBPjYw0vyPU9oGq3rQ7AS7NgcmL3wDcza6zVObOLVj3NkTSv5mCH4vm3rYkL1m3l03VBAio9i85Z\nu4UL1m3dva46lmJmdufu3khztGfbZu0YJx8zv+zim42sohf3DwD/KemaZPmNwCXlFMmG0brNU3z6\n1vsyt6m+f+IzDm04liKrmqnehru2Fyuome2jaK+nK5NeTycnq06PiO+UVywrW6+n4Wx1TMVVt97H\nv3zzR20PtPM8D2blKVxdFBHbgG0llsU6pMjI6bK1euEOyhmNnbdLa68DqVk/y9VGIenryfeHJf2q\n5uthSb/Kua8jJW2Q9B1J2yS9K1l/qKQvS/pu8n1es31ZtjJHThc1eWB6rqVOyNOltXaWvOrUpUUn\nUjIbRrkCRUScJEnAooh4cs3XwRHx5JzH3gGcFxHHAi8F3inpWGAV8JWIeC7wlWTZ2lDmyOki1m2e\n4pHHdnTlWADzDhzP9TRQZiDN6vprNqhyVz1FREj6AnBcOweOiJ8AP0lePyzpTmAB8Hrg5clmVwA3\nA+9p51ijLm3E8SET4yxdvb7j1S1rbrw7c1KgMo2PiQtPW5TrZ8oKpP1QxWfWCUW7x35bUmkpxSUt\nBJYA3wQOS4IIwE+Bw1J+5h2SNkrauH27e7hkaTTieHyO+PUTO/aqbll5zW0sufimUu+G122eKjUl\nx9gcMZ4yHPuguWMcNHc/zl27JVf509oz8rZzpD2ZnHf1bX6ysIFWNFC8BLhV0vcl3S5pq6Tbi+xI\n0pOozGVxTkTs1c4RldGADW9FI+LyiDgxIk6cP9995rM0GnH8pAP2Y3bn3qd2dlfw0KOzpdXTV++w\ny7RzVzB3vzl7fZYPnbmYD525mF0B0zP5y19W6o60J5BqunIHCxtURXs9LS/j4JLGqQSJqyLiumT1\nA5KeHhE/SaZYfbCMY426+kynz1z1haY/Uz+FaLOeQfXvP/rEjo6kDP/1Ezu55Hf3PvbS1etT2xla\nSd0B7U9dmpXQsNWymPWjooHiAeBPgZOo3PF/Hfj7PDtIGsU/AdwZER+seesG4GxgdfL98wXLaBla\nzdJavUtuVv/e6P0sotITqtFc1a2ov+i2285QRsrwZnOCe2yHDaqiVU9XAouA/wt8BDgW+FTOfSwF\n3gYsk7Ql+XoNlQBxiqTvAq9Mlq1kraa4qNbTN6t/zzvhUACPze5iXsFus/UX3bLaGdqRNQFSt8ti\nVqaiTxTPT7q1Vm2QlGtkdkR8ncqNZSOvKFgua1ErKS5q6+mb1b8XqWKamd3J/vvNYWJ8LPfP1190\n+2Vq0upTST+Uxaws7fR6eml1QdJLAKdvHSBZ1SCNUmxn3Q3XJu7L65czs03TitfvudFFt59ShPdT\nWczKUDTN+J3A0UA1y9tRwN1UBtFFRLygtBK2oNtpxoch3cPS1esbtiOMSXzgTce3lPK7Xv2TwfiY\nIMgcQ1GbDjwtrfgbXriADXdtH+jzbdaPWk0zXrTq6VUFf27gDcugqrSG12pVEuz9eaqvz7v6toa5\nmBYkF/D6AArsnjtCsM88FCcfM3+vQX8OCmb9p9ATRb/p5hNF2p34IE6Us27zVOaFv9HnyZpMqNkF\nvf5J7ORj5nPtpqlC+zKz9nV64qKR1eu8Sc3kyTW0YskCdqXcKKR9nqL1742q6zbctb3nyQrNrDnP\nSpdT2viDfuj6WKRarMjnyTvmIK1cHm9gNhj8RJFTWekeOqFIFtQyP0/a00xauTzewGww5HqikPTu\nrPfrRlgPpbLSPXRCkWqxsj7PBeu2clXNfNi1TzNZYzDqe0r1S9A1sz3yVj0dnHw/GngRlXQbAKcB\n3yqrUP2ujHQPnVC0Wqzdz7Nu89ReQaKq+jSTVq60nlL9eG7NRlmuQBER7wWQ9FXghIh4OFm+CGie\nZc46qlejk9fceHfjFL9UniYuO3Nxarn6Neia2R5FG7MPA56oWX6ClHkjrHt6VS2WVbV1+OREX1fX\nmVlzRQPFlcC3JF2fLK+gMhud9Vgv7tDTqpbEnrmr/eRgNrgK9XqKiEuA3wceSr5+PyL+T5kFs8HR\nqOeUgLe89KiGqUA8p7TZYCn0RJHMJXEscEhEXCzpKEkvjoiRadC2PVqtWhqW9Cdmo6ZoUsC/B3YB\nyyLieZLmATdFRGnzaOfR7aSAvVAd2Tw1PcOYxM6I3b2G2r3IdivJ4TClPzEbBp1OCviSiDhB0maA\niHhI0tyC+7Im6u/Eq7mZyrgjX7d5ipXX3LY7w+vU9Awrr7mtrX2m6ff0J2bWWNGR2bOSxkiSgUqa\nT+UJwzoga/a4dnMjXXTDtn3SgM/uCi66YVvhfabph1nozCy/ok8UHwauB54m6RLgDOCvSivVkMtb\n1dPsjjvvHXnt8dMqHqdnis1lnSVrnMcwzPFhNqwKBYqIuErSJipTlgpYERF3llqyIVVm4r7a91s9\n9kU3bCs9CLR6kU9r9AbcyG3Wx4r2enp/RLwHuKvBOsuQlbgv7aKYNskQpI+8bmXuhyzzDhxvabu8\nga/ReIqlq9fnPidm1j1Fq55OAeqDwqsbrBtJWXfY7Sbuq+/1dPIx81lz492cu3ZL5h16o1xMacbH\nxIWnLWpp2yKBr54buc36W97ssX8C/CnwbEm317x1MPCNMgs2qJrdYZeZuC/tWAeMz9nn4t0sSIxJ\n7IrI3T5QxkW+n+f4MLP8vZ4+QyVT7OeT79WvF0bEW0ou20BqNidEmfM/pB3roUfztUFMjI/xgTcd\nzz2rT+WWVctyVfeU0ZOpn+f4MLOcgSIifhkRP6SSBPCXEXFvRNwLhKRPdqKAg6bZHXazqUTzpLgo\no2rmoLntzVFdxkW+6PSqZtYdRdsoXhAR09WFZMDdkpLKNNBaqUZJS5CXt2E47ViTE+P8+okdzO5s\n3ioxeeDcti7IZWWGddJAs/5VdMDdnCRtBwCSDsXzbwPt3WHnnco07VgXvW4RB81t7deR56kk7Wln\nxZIF3LJqWaGqKzPrf0Uv7h8A/lPSNcnyG4FLyinSYGvnDjtvw3DWsc5du6Wl8uYZg+GxDmajqeiA\nuyuTAXcnJ6tOj4jvlFeswVa0GqVI75+0YzUbpAf52hLK6AZrZoOpaNUTEbEtIj6SfA18kOiHeRLK\n7P2zcvnRjM9R6vt5G4w91sFsdOUdR/H1iDhJ0sPs3TVfQETEk0stXZf0S7VKmVOGVn+mNmXHvAPH\nufC0RV172jGz4VBoPop+0+58FKMwT0K7SffqgylUnnbcjdVscHVkPgpJ7856PyI+mGd//WLYq1XK\neGIq82nHzAZL3sbsg5PvRwMvAm5Ilk8DBnYa1GGvVimrIdpjHcxGU96R2e+NiPcCRwAnRMR5EXEe\n8ELgqE4UsBuGPYXEsD8xmVlnFe31dBiVNB5VTyTrcpH0SUkPSrqjZt2hkr4s6bvJ93lZ+yjDsKeQ\n8MxyZtaOogPurgS+Jen6ZHkFcEWB/fwz8JFkf1WrgK9ExGpJq5LljqcvH+ZqlayZ5czMmik64O4S\nSf8O/Fay6vcjYnOB/XxV0sK61a8HXp68vgK4Gc9z0RY3RJtZO4rOcCfgWOCQiLhY0lGSXhwRZTRo\nHxYRP0le/5QCVVq2r24+MXn+a7PhUrSN4mPAy4CzkuWHgY+WUqIaURnk0XCgh6R3SNooaeP27dvL\nPrQVVO2KOzU9Q7CnK24vRrqbWTmKBoqXRMQ7gcegkmYcmFtSmR6Q9HSA5PuDjTaKiMsj4sSIOHH+\n/PklHdralTcDrpn1v6KBYlbSGMndvqT5wK6SynQDcHby+mwqs+nZgHBXXLPhU7TX04eB64GnSboE\nOAO4IO9OJP0LlYbrp0r6MXAhsBq4WtLbgXuBNxUs41Dq9/r/YR+8aDaKcgeKpCH7q8Am4BVUEgKu\niIg78+4rIs5KeesVefc1CvoleWEWd8U1Gz65A0VEhKQvRsRxwF0dKFPf69VdfZ5UHL0qo7vimg2f\nolVP35b0ooj4r1JLMwB6eVffav1/r588hnnwotkoKtzrCbhV0vcl3S5pq6TbyyxYv+plr55WU3G4\n55GZlanoE8XyUksxQHrZq6fV+n/3PDKzMuWdj+IA4I+B5wBbgU9ExI5OFKxf9bJXT6v1/+55ZGZl\nyvtEcQUwC3wNeDWVNB7vKrtu6ZQDAAAJq0lEQVRQ/azVu/pONSa3Uv/vnkdmVqa8geLYpLcTkj7B\nAE9WVFQrd/X90JjcrIxmZq3KNWe2pG9HxAlpy73S7pzZZRuFObjNbPB1ZM5s4HhJv6oeA5hIlkVl\niMWTc+6vZzo5zsCNyWY2THIFiogYa75V/+t01ZAbk81smBQdRzHQOj3OYNjn4Daz0VJ0HMVAK6tq\nKK36yo3JZjZMRjJQlFE11Kz6ymkszGxYjGTVUxlVQ06TYWajYiSfKMqoGnLPJjMbFSMZKKD9DKfu\n2WRmo2Ikq57K4J5NZjYqRvaJol3u2WRmo8KBog3u2WRmo8BVT2ZmlsmBwszMMjlQmJlZJrdRlKiT\nGWnNzHrFgaIkvZ6syMysU1z1VBKn9DCzYeVAURKn9DCzYeVAUZK01B1O6WFmg86BoiRO6WFmw8qN\n2SVxSg8zG1YOFCVySg8zG0auejIzs0wOFGZmlsmBwszMMjlQmJlZJgcKMzPL1JeBQtKrJN0t6XuS\nVvW6PGZmo6zvAoWkMeCjwKuBY4GzJB3b21KZmY2uvgsUwIuB70XEDyLiCeCzwOt7XCYzs5HVj4Fi\nAfCjmuUfJ+v2IukdkjZK2rh9+/auFc7MbNT0Y6BoSURcHhEnRsSJ8+fP73VxzMyGVj8GiingyJrl\nI5J1ZmbWA/0YKP4LeK6kZ0qaC7wZuKHHZTIzG1l9lxQwInZI+jPgRmAM+GREbOtxsczMRlbfBQqA\niPgi8MVel8PMzPqz6snMzPqIA4WZmWVyoDAzs0wOFGZmlsmBwszMMjlQmJlZJgcKMzPL5EBhZmaZ\nHCjMzCyTA4WZmWVyoDAzs0wOFGZmlqkvkwL2s3Wbp1hz493cPz3D4ZMTrFx+NCuW7DMBn5nZ0HCg\nyGHd5inOv24rM7M7AZianuH867YCOFiY2dBy1VMOa268e3eQqJqZ3cmaG+/uUYnMzDrPgSKH+6dn\ncq03MxsGDhQ5HD45kWu9mdkwcKDIYeXyo5kYH9tr3cT4GCuXH92jEpmZdZ4bs3OoNli715OZjRIH\nipxWLFngwGBmI8VVT2ZmlsmBwszMMjlQmJlZJgcKMzPL5EBhZmaZFBG9LkPbJG0H7u11Odr0VOBn\nvS5EH/H52MPnYm8+H3tr53w8IyLmN9toKALFMJC0MSJO7HU5+oXPxx4+F3vz+dhbN86Hq57MzCyT\nA4WZmWVyoOgfl/e6AH3G52MPn4u9+XzsrePnw20UZmaWyU8UZmaWyYHCzMwyOVD0gKRPSnpQ0h01\n6w6V9GVJ302+z+tlGbtF0pGSNkj6jqRtkt6VrB/V83GApG9Jui05H+9N1j9T0jclfU/SWklze13W\nbpE0JmmzpH9Llkf5XPxQ0lZJWyRtTNZ1/H/FgaI3/hl4Vd26VcBXIuK5wFeS5VGwAzgvIo4FXgq8\nU9KxjO75eBxYFhHHA4uBV0l6KfB+4LKIeA7wEPD2Hpax294F3FmzPMrnAuDkiFhcM3ai4/8rDhQ9\nEBFfBX5Rt/r1wBXJ6yuAFV0tVI9ExE8i4tvJ64epXBAWMLrnIyLikWRxPPkKYBnwuWT9yJwPSUcA\npwL/mCyLET0XGTr+v+JA0T8Oi4ifJK9/ChzWy8L0gqSFwBLgm4zw+UiqWrYADwJfBr4PTEfEjmST\nH1MJpqPgQ8BfAruS5acwuucCKjcNN0naJOkdybqO/694hrs+FBEhaaT6LUt6EnAtcE5E/Kpy41gx\naucjInYCiyVNAtcDx/S4SD0h6bXAgxGxSdLLe12ePnFSRExJehrwZUl31b7Zqf8VP1H0jwckPR0g\n+f5gj8vTNZLGqQSJqyLiumT1yJ6PqoiYBjYALwMmJVVv7I4ApnpWsO5ZCrxO0g+Bz1Kpcvo7RvNc\nABARU8n3B6ncRLyYLvyvOFD0jxuAs5PXZwOf72FZuiapc/4EcGdEfLDmrVE9H/OTJwkkTQCnUGm3\n2QCckWw2EucjIs6PiCMiYiHwZmB9RLyFETwXAJIOknRw9TXwO8AddOF/xSOze0DSvwAvp5Ie+AHg\nQmAdcDVwFJWU6W+KiPoG76Ej6STga8BW9tRD/y8q7RSjeD5eQKVBcozKjdzVEXGxpGdRuas+FNgM\nvDUiHu9dSbsrqXr6i4h47aiei+RzX58s7gd8JiIukfQUOvy/4kBhZmaZXPVkZmaZHCjMzCyTA4WZ\nmWVyoDAzs0wOFGZmlsmBwszMMjlQ2NCQtEJSSMpMeSFpUtKftnmsR1LW70xSQN8h6RpJB6Zs9412\njt8qSc9LUlPPSZbHJN0k6fe6cXwbDg4UNkzOAr6efM8yCbQVKDLMJCmgnw88Afxx7ZuqmBMRv9mh\n4+8lIu6kMrL7tcmqS4C7I+LKbhzfhoMDhQ2FJKngSVTmJnhzzfrfk3R7MhHQp5LVq4FnJ3f+ayQt\nrJtE6i8kXZS8Xpdk6txWk62zVV8DnpPs/25JV1JJuXBk7RNJShmR9NZkEqMtkv4heRo4SNIXkm3v\nkHRmC+W4DPgTSW+gkj/p3Tk/h404Z4+1YfF64EsR8d+Sfi7phcBjwAXAb0bEzyQdmmy7Cnh+RCyG\n3enN0/xBRPwiybv0X5KujYifNytMkrTu1cCXklXPBc6OiFuT96vbLWpURknPA84ElkbErKSPAW8B\nfg3cHxGnJtsdknz/IvCHEXF/fVki4iZJHwAuBX47Imabld+slp8obFicRSX/D8n3s6hkG70mIn4G\nUDD/zZ9Lug24FTiSygU/y0Qyl8RG4D4qCQ8B7q0GiTppZXwF8EIqwWlLsvwsKjmxTpH0fkm/FRG/\nTH7uNY2CRI1vAB+MiJ9WV0j6myafxQzwE4UNgeQufBlwXJKLf4zKBC9rWtzFDva+aTog2e/LgVcC\nL4uIRyXdXH0vw0z1SaWmfFB5EshDwBURcf4+b0gnAK8B3ifpKxFxcQv7Oxb4p5p9/AaV2fPMmvIT\nhQ2DM4BPRcQzImJhRBwJ3APcDrwxya5JTdXTw8DBNT//APA0SU+RtD97Gn4PAR5KgsQxVOb0Ltv6\nlDJ+BTgjmaAGSYdKeoakw4FHI+LTVALhCS0eZxGV9pGqxcCWMj6ADT8HChsGZ7En/XLVtVQatS8B\n/l9SffRBgKSN4ZakMXhNUmd/MfAtKlOPVmcN+xKwn6Q7qTSAN6o6aktEbEsp43eotF3cJOn2pFxP\nB44DvpVUR10IvA8qbRRJENmHpCOpTB9a26XXgcJa5jTjZiNI0ieAP4qIXU03tpHnQGFmZplc9WRm\nZpkcKMzMLJMDhZmZZXKgMDOzTA4UZmaWyYHCzMwyOVCYmVkmBwozM8vkQGFmZpn+P0oQ58T6BoBj\nAAAAAElFTkSuQmCC\n",
+            "text/plain": [
+              "<Figure size 432x288 with 1 Axes>"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          }
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2X1RA6sgtZQ6",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Conclusion\n",
+        "- looks like the multiple regression we ran does provide more accurate predictions than the simple linear regression\n",
+        "  - this will not always be the case, so always be sure to check and confirm if the extra computing is worth it\n",
+        "\n",
+        "Anyways, that's how you implement both Simple and Multiple Linear Regression with `cuML`. Go forth and do great things. Thanks for stopping by!"
+      ]
+    }
+  ]
+}

From cd659b32ff7e34f183e7d8be617ef563c93dac8a Mon Sep 17 00:00:00 2001
From: Winston <43570913+gumdropsteve@users.noreply.github.com>
Date: Fri, 4 Oct 2019 00:04:09 -0700
Subject: [PATCH 6/7] Delete linear_regression_boston_demo.ipynb

Incorrect branch, find in patch-3
---
 .../linear_regression_boston_demo.ipynb       | 768 ------------------
 1 file changed, 768 deletions(-)
 delete mode 100644 intermediate_notebooks/examples/linear_regression_boston_demo.ipynb

diff --git a/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb b/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb
deleted file mode 100644
index 53b868d2..00000000
--- a/intermediate_notebooks/examples/linear_regression_boston_demo.ipynb
+++ /dev/null
@@ -1,768 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "LOCAL_intro_lin_reg_cuml",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU"
-  },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2tZ3RLnlkrkg",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Intro to  Linear Regression with cuML\n",
-        "Corresponding notebook to [*Beginner’s Guide to Linear Regression in Python with cuML*](https://medium.com/future-vision/beginners-guide-to-linear-regression-in-python-with-cuml-30e2709c761) story on Medium\n",
-        "\n",
-        "Linear Regression is a simple machine learning model where the response `y` is modelled by a linear combination of the predictors in `X`. The `LinearRegression` function implemented in the `cuML` library allows users to change the `fit_intercept`, `normalize`, and `algorithm` parameters. \n",
-        "\n",
-        "Here is a brief on RAPIDS' Linear Regression parameters:\n",
-        "\n",
-        "- `algorithm`: 'eig' or 'svd' (default = 'eig')\n",
-        "    - `Eig` uses a eigendecomposition of the covariance matrix, and is much faster\n",
-        "    - `SVD` is slower, but guaranteed to be stable\n",
-        "- `fit_intercept`: boolean (default = True)\n",
-        "  - If `True`, `LinearRegresssion` tries to correct for the global mean of `y`\n",
-        "  - If `False`, the model expects that you have centered the data.\n",
-        "- `normalize`: boolean (default = False)\n",
-        "  - If True, the predictors in X will be normalized by dividing by it’s L2 norm\n",
-        "  - If False, no scaling will be done\n",
-        "\n",
-        "Methods that can be used with `LinearRegression` are:\n",
-        "\n",
-        "- `fit`: Fit the model with `X` and `y`\n",
-        "- `get_params`: Sklearn style return parameter state\n",
-        "- `predict`: Predicts the `y` for `X`\n",
-        "- `set_params`: Sklearn style set parameter state to dictionary of params\n",
-        "\n",
-        "`cuML`'s `LinearRegression` expects expects either `cuDF` DataFrame or `NumPy` matrix inputs\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-tG6ezqKh1Z0",
-        "colab_type": "text"
-      },
-      "source": [
-        "Note: `CuPy` is not installed by default with RAPIDS `Conda` or `Docker` packages, but is needed for visualizing results in this notebook.\n",
-        "- install with `pip` via the cell below "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pxBcXor_0-Jd",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# install cupy\n",
-        "!pip install cupy"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "N20le3_KlP3O",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Load data\n",
-        "- for this demo, we will be utilizing the Boston housing dataset from `sklearn`\n",
-        "  - start by loading in the set and printing a map of the contents"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "RFE-nxxlTajg",
-        "colab_type": "code",
-        "outputId": "04f89e88-61a3-4dd2-9088-123b410e508c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "from sklearn.datasets import load_boston\n",
-        "\n",
-        "# load Boston dataset\n",
-        "boston = load_boston()\n",
-        "\n",
-        "# let's see what's inside\n",
-        "print(boston.keys())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wmcO8dxO0uOB",
-        "colab_type": "text"
-      },
-      "source": [
-        "#### Boston house prices dataset\n",
-        "- a description of the dataset is provided in `DESCR`\n",
-        "  - let's explore "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "c3kLHAsP-Al2",
-        "colab_type": "code",
-        "outputId": "02518c3c-7767-42a7-b6f4-6756ace741cc",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 923
-        }
-      },
-      "source": [
-        "# what do we know about this dataset?\n",
-        "print(boston.DESCR)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            ".. _boston_dataset:\n",
-            "\n",
-            "Boston house prices dataset\n",
-            "---------------------------\n",
-            "\n",
-            "**Data Set Characteristics:**  \n",
-            "\n",
-            "    :Number of Instances: 506 \n",
-            "\n",
-            "    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.\n",
-            "\n",
-            "    :Attribute Information (in order):\n",
-            "        - CRIM     per capita crime rate by town\n",
-            "        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\n",
-            "        - INDUS    proportion of non-retail business acres per town\n",
-            "        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n",
-            "        - NOX      nitric oxides concentration (parts per 10 million)\n",
-            "        - RM       average number of rooms per dwelling\n",
-            "        - AGE      proportion of owner-occupied units built prior to 1940\n",
-            "        - DIS      weighted distances to five Boston employment centres\n",
-            "        - RAD      index of accessibility to radial highways\n",
-            "        - TAX      full-value property-tax rate per $10,000\n",
-            "        - PTRATIO  pupil-teacher ratio by town\n",
-            "        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\n",
-            "        - LSTAT    % lower status of the population\n",
-            "        - MEDV     Median value of owner-occupied homes in $1000's\n",
-            "\n",
-            "    :Missing Attribute Values: None\n",
-            "\n",
-            "    :Creator: Harrison, D. and Rubinfeld, D.L.\n",
-            "\n",
-            "This is a copy of UCI ML housing dataset.\n",
-            "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/\n",
-            "\n",
-            "\n",
-            "This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n",
-            "\n",
-            "The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\n",
-            "prices and the demand for clean air', J. Environ. Economics & Management,\n",
-            "vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics\n",
-            "...', Wiley, 1980.   N.B. Various transformations are used in the table on\n",
-            "pages 244-261 of the latter.\n",
-            "\n",
-            "The Boston house-price data has been used in many machine learning papers that address regression\n",
-            "problems.   \n",
-            "     \n",
-            ".. topic:: References\n",
-            "\n",
-            "   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n",
-            "   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n",
-            "\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wI_sB78vE297",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Build Dataframe\n",
-        "- Import `cuDF` and input the data into a DataFrame \n",
-        "  - Then add a `PRICE` column equal to the `target` key"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "xiMmIZ8O5scJ",
-        "colab_type": "code",
-        "outputId": "fd09db1f-fb41-4494-bb8b-eab6e18c258f",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 206
-        }
-      },
-      "source": [
-        "import cudf\n",
-        "\n",
-        "# build dataframe from data key\n",
-        "bos = cudf.DataFrame(list(boston.data))\n",
-        "# set column names to feature_names\n",
-        "bos.columns = boston.feature_names\n",
-        "\n",
-        "# add PRICE column from target\n",
-        "bos['PRICE'] = boston.target\n",
-        "\n",
-        "# let's see what we're working with\n",
-        "bos.head()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>CRIM</th>\n",
-              "      <th>ZN</th>\n",
-              "      <th>INDUS</th>\n",
-              "      <th>CHAS</th>\n",
-              "      <th>NOX</th>\n",
-              "      <th>RM</th>\n",
-              "      <th>AGE</th>\n",
-              "      <th>DIS</th>\n",
-              "      <th>RAD</th>\n",
-              "      <th>TAX</th>\n",
-              "      <th>PTRATIO</th>\n",
-              "      <th>B</th>\n",
-              "      <th>LSTAT</th>\n",
-              "      <th>PRICE</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>0.00632</td>\n",
-              "      <td>18.0</td>\n",
-              "      <td>2.31</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.538</td>\n",
-              "      <td>6.575</td>\n",
-              "      <td>65.2</td>\n",
-              "      <td>4.0900</td>\n",
-              "      <td>1.0</td>\n",
-              "      <td>296.0</td>\n",
-              "      <td>15.3</td>\n",
-              "      <td>396.90</td>\n",
-              "      <td>4.98</td>\n",
-              "      <td>24.0</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>0.02731</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>7.07</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.469</td>\n",
-              "      <td>6.421</td>\n",
-              "      <td>78.9</td>\n",
-              "      <td>4.9671</td>\n",
-              "      <td>2.0</td>\n",
-              "      <td>242.0</td>\n",
-              "      <td>17.8</td>\n",
-              "      <td>396.90</td>\n",
-              "      <td>9.14</td>\n",
-              "      <td>21.6</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>0.02729</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>7.07</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.469</td>\n",
-              "      <td>7.185</td>\n",
-              "      <td>61.1</td>\n",
-              "      <td>4.9671</td>\n",
-              "      <td>2.0</td>\n",
-              "      <td>242.0</td>\n",
-              "      <td>17.8</td>\n",
-              "      <td>392.83</td>\n",
-              "      <td>4.03</td>\n",
-              "      <td>34.7</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>0.03237</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>2.18</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.458</td>\n",
-              "      <td>6.998</td>\n",
-              "      <td>45.8</td>\n",
-              "      <td>6.0622</td>\n",
-              "      <td>3.0</td>\n",
-              "      <td>222.0</td>\n",
-              "      <td>18.7</td>\n",
-              "      <td>394.63</td>\n",
-              "      <td>2.94</td>\n",
-              "      <td>33.4</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>0.06905</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>2.18</td>\n",
-              "      <td>0.0</td>\n",
-              "      <td>0.458</td>\n",
-              "      <td>7.147</td>\n",
-              "      <td>54.2</td>\n",
-              "      <td>6.0622</td>\n",
-              "      <td>3.0</td>\n",
-              "      <td>222.0</td>\n",
-              "      <td>18.7</td>\n",
-              "      <td>396.90</td>\n",
-              "      <td>5.33</td>\n",
-              "      <td>36.2</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "      CRIM    ZN  INDUS  CHAS    NOX  ...    TAX  PTRATIO       B  LSTAT  PRICE\n",
-              "0  0.00632  18.0   2.31   0.0  0.538  ...  296.0     15.3  396.90   4.98   24.0\n",
-              "1  0.02731   0.0   7.07   0.0  0.469  ...  242.0     17.8  396.90   9.14   21.6\n",
-              "2  0.02729   0.0   7.07   0.0  0.469  ...  242.0     17.8  392.83   4.03   34.7\n",
-              "3  0.03237   0.0   2.18   0.0  0.458  ...  222.0     18.7  394.63   2.94   33.4\n",
-              "4  0.06905   0.0   2.18   0.0  0.458  ...  222.0     18.7  396.90   5.33   36.2\n",
-              "\n",
-              "[5 rows x 14 columns]"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 5
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r2qrTxo4ljZp",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Split Train from Test\n",
-        "- For basic Linear Regression, we will predict `PRICE` (Median value of owner-occupied homes) based on `TAX` (full-value property-tax rate per $10,000)\n",
-        "  - Go ahead and trim data to just these columns"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "spaDB10E3okF",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# simple linear regression X and Y\n",
-        "X = bos['TAX']\n",
-        "Y = bos['PRICE']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4TKLv8FjIBuI",
-        "colab_type": "text"
-      },
-      "source": [
-        "We can now set training and testing sets for our model\n",
-        "- Use `cuML`'s `train_test_split` to do this\n",
-        "  - Train on 70% of data\n",
-        "  - Test on 30% of data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "1DC6FHsNIKH_",
-        "colab_type": "code",
-        "outputId": "4c932268-7a82-4ac3-c7b9-9966ffc2b12e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 86
-        }
-      },
-      "source": [
-        "from cuml.preprocessing.model_selection import train_test_split\n",
-        "\n",
-        "# train/test split (70:30)\n",
-        "sX_train, sX_test, sY_train, sY_test = train_test_split(X, Y, train_size = 0.7)\n",
-        "\n",
-        "# see what it looks like\n",
-        "print(sX_train.shape)\n",
-        "print(sX_test.shape)\n",
-        "print(sY_train.shape)\n",
-        "print(sY_test.shape)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "(354,)\n",
-            "(152,)\n",
-            "(354,)\n",
-            "(152,)\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZLVg44gAmJG7",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Predict Values\n",
-        "1. fit the model with `TAX` (*X_train*) and corresponding `PRICE` (*y_train*) values \n",
-        "  - so it can build an understanding of their relationship \n",
-        "2. predict `PRICE` (*y_test*) for a test set of `TAX` (*X_test*) values\n",
-        "  - and compare `PRICE` predictions to actual median house (*y_test*) values\n",
-        "    - use `sklearn`'s `mean_squared_error` to do this"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ZGMPloJxGtK3",
-        "colab_type": "code",
-        "outputId": "664b54fe-16d5-4140-a657-3dc782574da9",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "from cuml import LinearRegression\n",
-        "from sklearn.metrics import mean_squared_error\n",
-        "\n",
-        "# call Linear Regression model\n",
-        "slr = LinearRegression()\n",
-        "\n",
-        "# train the model\n",
-        "slr.fit(sX_train, sY_train)\n",
-        "\n",
-        "# make predictions for test X values\n",
-        "sY_pred = slr.predict(sX_test)\n",
-        "\n",
-        "# calculate error\n",
-        "mse = mean_squared_error(sY_test, sY_pred)\n",
-        "print(mse)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "54.32312606491228\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "T7BXjkPSGwqd",
-        "colab_type": "text"
-      },
-      "source": [
-        "3. visualize prediction accuracy with `matplotlib`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pp9RNPt_Iemk",
-        "colab_type": "code",
-        "outputId": "22a22472-50ad-4bb3-d104-35e9e100b8b6",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 305
-        }
-      },
-      "source": [
-        "import cupy\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "# scatter actual and predicted results\n",
-        "plt.scatter(sY_test, sY_pred)\n",
-        "\n",
-        "# label graph\n",
-        "plt.xlabel(\"Actual Prices: $Y_i$\")\n",
-        "plt.ylabel(\"Predicted prices: $\\hat{Y}_i$\")\n",
-        "plt.title(\"Prices vs Predicted prices: $Y_i$ vs $\\hat{Y}_i$\")\n",
-        "\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEgCAYAAACq+TSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu4XHV97/H3J5sNbi5lQwkom8Qo\nWiiIEAiCgqeAF1REU7yVI5ba1jxaTytesGCtBCsVjULxVJ82p1DFogeRkNLjBaliFRVoQoIhBGqV\n6wa5B4JsYCf5nj9mzWb2ZGbNzJo1M2tmPq/nyZOZNWvW+s6a2eu71u+qiMDMzKyeOb0OwMzMis2J\nwszMUjlRmJlZKicKMzNL5URhZmapnCjMzCyVE4WZmaVyojCzgSXpDZLe0Os4+p3c4c7MBpGkPYDv\nJU9fExEP9zKefuZEYWYDSdIXgSuAEeBNEfH+HofUt5wozMwslesozMwslROFIWm9pGN6HUdRSPqy\npE8lj7tybCr3mdP2/J1abpwoBpCkOyRNSXpC0v3JSWjneutHxIER8cMuhtiWVj9fO5o9NklMr+5E\nDFn08juVtJukTdWJStJXJa2QpEHc9yBzohhcJ0bEzsChwCLg49UrSNqu61HlZ9A/XyZF+MwR8Siw\nHDitvEzSXwO/C5wSHawY7eW+B5kTxYCLiEngO8BLYObK9y8l/Rz4jaTtKq+GJc1LrrwelPSwpL8v\nb0vS3pIuT167XdJfVLz2l5Imk6u52yS9qjqWZJ1vVi27QNIXmt1Gxs9XN+7kPQsl3Zjs91LgORWv\nzbpTqHV8JH0VmA/8W3KX89EmjlfdfdaSxHGmpFskPSrpnyU9J+UzN4y7UYxZv5PEecDxkl4o6W3A\nEkotj55M+Yx5/T5a3rc1EBH+N2D/gDuAVyeP5wHrgb+peG1tsnyscn1KzQhvAs4HdqJ08jo6WWcO\nsBr4BLA98ELgV8DxwH7A3cDeyboLgH1rxPV84Elgl+T5CHAfcGSz22j186XFnay/PXAn8EFgFHgr\nMA18qsa+0o7PzHpNHK/UfaZ85puTz7U78JOqGGt+p2lxN3Fs6n4nwJeALzX4HV4EfBd4EDi0id9t\nLr+PLPv2vwbfTa8D8L8OfKmlk8QTwMbkhPSlqhPIH9dY/9XAy5M/rO1qbPMI4K6qZWcC/wy8CHgg\n2cZog9iuBf4wefwa4JfJ41a20fTnS4s7efw/gHtJmoony35K7USRdnxm1mvieKXuM+Uzv7fi+Rsq\njl3d7zQt7iaOTdPfSZ2YXwIE8Paq5f8LeHGnfh9Z9+1/9f/1vDzTOmZxRPx7ndfurrN8HnBnRGyu\n8drzgb0lbaxYNgL8OCL+W9JpwFLgQElXAR+KiHtrbOdrwMnAxcD/TJ7T4jZa+Xx1404e7w1MRnIW\nSdxZZ7tpx6da2n5b2Welys91Z7KdWq9Vqxd36rHJ8J1U2x54GlhRuTAi/r726kB+v48s+7Y6XEcx\nnOpV6N0NzK9TIXo3cHtEjFf82yUi3gAQEV+LiKMpnXwC+EydfVwGHCNpH+D3SU4ELW6jkcrPlxo3\npaKNiarWMPPrbDft+FQf07T9trLPSvOq1q88SaZV0taLu9Gxafc7ORi4uTpBSfphynvy+n1k2bfV\n4URhlW6gdBI7V9JOkp4j6aiK1zYlFYpjkkYkvUTS4ZL2k3ScpB2Ap4ApYGutHUTEg8APKRXB3B4R\nGwBa2UaGz1Qz7uT1nwGbgb+QNCrpJOBlKduqd3zup1TG38x+W9lnpfdL2kfS7sBfAZe2cAxqxZ16\nbHL4Tg6hVHcyQ6Xxlx6o94Ycfx8t79vqc6KwGRGxBTiRUnnwXcA9wDsqXnsjpT/A24GHgH8CdgV2\nAM5Nlv0a2JNSWXc9X6NU1vy1imWtbqOVz1QvbiLiGeAk4I+ARyh93hUp26p5fIBPAx+XtFHSR9L2\n28o+q3yN0iB3vwJ+CTTVQa9e3I2ODSnfiaR/kPQPDXZ9MFUna+ClwLoG78vj95F131aDx3oy6wOS\n7gD+NKVepi8k9Qx3RMTKYdp3v/MdhZl100HAz4dw333NrZ7MrGsi4k+Gcd/9zkVPZmaWykVPZmaW\nyonCzMxSDUQdxR577BELFizodRhmZn1l9erVD0XE3EbrDUSiWLBgAatWrep1GGZmfUVSM0PHuOjJ\nzMzSOVGYmVkqJwozM0vVs0SRzLh1TTJj13pJH0iWHyLpOklrJa2S1MxgaWZm1iG9rMzeDHw4Im6U\ntAuwWtLVwGeBsyPiO5LekDw/podxmpkNtZ4lioi4j9LQx0TEJkkbgAlK48z/VrLarswec9/63Mo1\nkyy76jbu3TjF3uNjnH78fixeOJF5PTPrvEI0j5W0AFgIXA+cBlwl6XOUisZe0bvILE8r10xy5op1\nTE1vAWBy4xRnriiN+lyZBJpdz8y6o+eJQtLOwOXAaRHxuKRPAR+MiMslvR24kNLY9NXvWwIsAZg/\nv5nJwazXll1128zJv2xqegvLrrptVgJodj0rhrS7v1qvAS3fLfoOs7d6migkjVJKEpdERHnillOB\nDySPL6M0kco2ImI5sBxg0aJFHtmwD9y7caqp5c2uZ72XdvcHbPPa6ZfdBILpLbHN+vVO/L7D7L1e\ntnoSpbuFDRFxXsVL9wK/lzw+DvhFt2Ozzth7fKyp5c2uZ72XdvdX67XprTGTJKrXz7IP645e9qM4\nCngXcFzSFHZt0srpPcDnJd0E/C1J8ZL1v9OP34+x0ZFZy8ZGR2aKI1pdz3ov7e6vlTvAtHV9h9l7\nvWz1dC2gOi8f1s1YrDvKxQSNypqbXc96b+/xMSZrnLDLd3+1Xqu3naz7sM7reWW2DZfFCyeaOuE3\nu5711unH7zer/gBm3/1VvzY6R7PqKKrXz7IP6zwnCjPLrJm7v3ZbPfkOs/cGYirURYsWhYcZNzNr\njaTVEbGo0Xq+ozCzprk/w3ByojCzprg/w/ByojDrkiJdjWeJxT3mh5cThXVdkU6Y3YqpSFfjWWOp\n19TV/RkGnycusq4qn6QmN04RPHuSWrlmcqBjKlLv4iyxrFwzWbfTk/szDD4nCuuqIp0wy7oRU5F6\nF2eJZdlVt1GrfaTA/RmGgBOFdVWRTpiN9p1nTEUavypLLPWOReCK7GHgRGFdVaQTZqN95xlTkcav\nyhJLvWMx4WKnoeBEYV1VpBNmWTdiWrxwgk+fdBAT42OI0gn20ycd1JOr8SyxFPF7s+5xz2zrumFs\n9TQIfIwGT7M9s50orLB6eWLySdHHYBh4CA/ra73sd5D3vqtPuMfuP5drbn2w0CfgPI9BPyacfoy5\nk3xHYYV01Lk/qNnBa2J8jJ+ccVzb2087EbS670ZzRlcPkV1tbHSkZ/UV9eR1/Gt9/iJ+3kr9GHNW\nzd5RuDLbCqmTTVYbdbBrZd+NtlWrj0a1XvcjqSWv41/EfjON9GPMneZEYYXUySarjU4Erey70baa\nPbEWbRiMvI5/EfvNNNKPMXeaE4UVUiebYzY6EbSy70bbavbEWrRhMPI6/kXsN9NIP8bcaU4UVkid\n7HfQ6ETQyr4bbavWCbdaEfsj5HX8+7H/RT/G3GmuzLahk2dlZTPb6sdWT3nqxxZE/RhzFu5HYZYi\nzxNBN08qw3ICs+5wojAbMLXuXkRpYL4JJw3LoPDNYyXNk3SNpFskrZf0gYrX/lzSrcnyz/YqRrMi\nqdXCqnyZV4R5PWxw9bJn9mbgwxFxo6RdgNWSrgb2At4MHBwRT0vas4cxWh8btGKaRs0zPS2pdUrP\nEkVE3AfclzzeJGkDMAG8Bzg3Ip5OXnugVzEW1aCdADuhSFOP5mXv8bG605GWDXNbf+ucQoz1JGkB\nsBC4HlgGvFLSOcBTwEci4j97F12xDOIJsBPqdYQ77dK1LL1yPRJsfHK6bqLtRDJud5unH79fw+FA\nhrmtf158IbatnicKSTsDlwOnRcTjkrYDdgeOBA4HviHphVFV6y5pCbAEYP78+V2OunfSegIP+4+5\nUtqV9cap6ZnH5US76s5HZpqsju84yhNPbWZ6a8xaB7In4zwSfHm9ZVfdxuTGqZmK7LJhb+ufB1+I\n1dbTDneSRikliUsiYkWy+B5gRZTcAGwF9qh+b0Qsj4hFEbFo7ty53Qu6xzy8QHNaubKemt7CJdfd\nNTNe06NPTs8kicp12hnrZ+mV63MZP2jxwgl+csZx3HHuCZz/jkMKMRHSIPE4T7X17I5CkoALgQ0R\ncV7FSyuBY4FrJP0OsD3wUA9CLKR65dSdLHLo9K14J7bfTDFNpWYaiWdNxivXTM66i8ljm1BKGvWO\nk4tPsvGFWG29LHo6CngXsE7S2mTZx4CLgIsk3Qw8A5xaXew0zGqdADtZ5NDpW/G07QOZT3bVxTR5\nyJqM065GO5HgXXySXS8uxPqBO9z1oW5eLXZ6Xoh62x8fG+XpzVs7NsxGq9qZj+AFZ3wr9Y4l785y\nnf7OBtkwzUUBnuFuoKUVOeSt07fi9bZTq6imUaV9vQRaeXdxb1IP0cjoiNhp++14bKp+y6hmNWrW\nmvcVv4tPsqv+rbjYrsSJwlJ1+la8mb4Bleqd7BoVYZ39b+t59Mna9QS17LT9dix904G5nCCaqS/J\ns+Wai0/a080LsX7hYcYtVaeHXK63/d12HK25fr2TXb3WKkuvXM/p37yppSQBpTuavIbEqB6yu568\nrvg9TLblzYnCUnVyXoh623/LYRPUqjpLO9mlFWFNb8lWD5dns8hys9bbzz2BiQ5PjNPp78yGjyuz\nrS15V6zXq3jebcdRzjpxdlFQ5b7nSGzpwG9ZwO3nnpDrNoetwtSKy5XZ1nGdaIZZqwgJYMftt9sm\nSZx+2U0zHeNqJYnROaXksbWJ/FHdy7msE+X6rjC1fuNEMQTKV96TG6cYSa68JxrMtNbMnULaeErL\nrrqt6ZNfZXz1VBctLb1y/Ta9p4GZOoBdx0b5zTOb2bq14e6B2kmik+X6eVeY9kMHu36IMW+d/Mzd\nPJ4uehpwrfQhKF9V71Y11hE8WzQCzTczrVWcUmta0MtXTzaMr7oPwIIzvlV33TvOPaFuX4JmjY+N\nZmr11IuTYT8UZfVDjHnr5GfOa9uFn7jIuqNeUU4t5RN/vbGOzv639Zy5Yt3MmEiNVFcGl3/c5fdP\nbpzikuvuahhfliv7dlsQ7bTDdpmSRPXn68ZkQv0wPlE/xJi3Tn7mbh9PJ4oBl2cnq0efnG65d3Pl\n/tNmaEtT6yqpXvPZOSqdsNutW8hy3Hp1MuyHDnb9EGPeOvmZu308nSgGXK87WVXuP8uPeGJ8rOaV\n/VknHsjoyLa9ErYGnLliHcfuP3ebvgStyHLcenUyrBdrr7/7Sv0QY946+Zm7fTydKAZcrc5XWYyN\njjA+VvsqfmJ8jL97xyENO3nV+xHX64SWVuS0eOEEy956MCPa9t1T01u45tYHt+lLcMqR82eej4+N\n1kw0jfabplcnw37oYNcPMeatk5+528fTrZ4GXPUoqrVaPdWaBKfWWEdA3ZFrm2nyWW/k27ccNjET\nR2V8jSqCFy+c4IOXrq352r0bpxq2LKrXGixrBXS3R/Yt64fmtv0QY946+Zm7fTzd6smA5lvrtNuq\nJ+9WQUUbKXUYm4Ba/2q21ZMTheWq2yfKXjW77Hb7eMjn6jHvuyjrb04U1nWDeNKut79uto8fnSMQ\ns8asyrK/tD41g96nwWpzorCuq1cMNCKxNaIQRVV56GRxVysdBVvdX6Nte2Kj4dO1sZ4kfTLZzlpg\nbUT8V7vbtP5UrxloeRymWmNBpSWCTowllUfi6UX7+Dz212j9Qe7TYO1pqXmspFOql0XEJ4ALgMeA\n35f0f3KKzfpMM81AKzugNerJnHcHtrx6TveifXwe+2u0/iD3abD2tNqP4l2SLpA0qwFvRNwfEVdF\nxGci4j05xmd9pNk+G+Ur10aJIO8r97wST7fbx4/O0TZ9PrLsL+37GfQ+Ddae1EQh6UBJl1Qsej0w\nBfxA0tyORmZ9p3rCnFqd4eDZK9dGiaDZK/eVayY56twf8IIzvsVR5/6g7h1CXomnkxMD1dr2srcd\nzLK3Htz2/iq3Dc9+P57YyBppVEfx78DLy08iYitwhqSTgB9LOo9S3cTNEfFk58K0flHZya1e66Dy\nlWujuZ2b6cDWSj1GnnNJd3Je5XrbzisROSFYqxoVPb0WOKdygaQ3An8KPAMcCnwOuFvSf3ckQutb\nja68GxXhNHPl3kpx0jAOI2GWh9Q7iohYB7yz/FzS7cAtwPkRcXXlupL2aWXHkuYBFwN7URo9YnlE\nXFDx+ocpJaG5EfFQK9u27kprSZR2BdvMMASNroBbKU4axmEkzPLQavPY10fErbVeiIh7WtzWZuDD\nEXGjpF2A1ZKujohbkiTyWuCuFrdpXdZuE9Z2i0JaLU5y0YtZ61pq9VQvSWQREfdFxI3J403ABqD8\nF3w+8FGam67AeqjXE9LUayX05DObG1ZuF0WzlfFmvVKI0WMlLQAWAtdLejMwGRE3qU6rGSuOXk9I\nU12cVJ4r+9Enp4F8Oul1Uic6FZrlrefzUUjaGbgcOI1ScdTHgE808b4lklZJWvXggw92OEqrpwgT\n0ixeOMFPzjiO2889gZ122G7WmEhQ7Ck3e31HZtaMniYKSaOUksQlEbEC2Bd4AXCTpDuAfYAbJT23\n+r0RsTwiFkXEorlz3aWjV4rWkqjXdzit6rd4bThlThTVJ+9aJ/MG7xdwIbAhIs6DUiuriNgzIhZE\nxALgHuDQiPh11jitszrZ+SyLItzhtKLf4rXh1E4dxYXACSnPGzkKeBewTlJ5mrKPRcS324jJeqBI\nLYl6NctcVv0Wrw2nzIkiIk5Ie97E+6+l/nTJ5XUWtB6ZDbN+6yvRb/HacMo0H4WktwHfjYhNkj5O\nqYf230TEmrwDbIbnozAza12z81FkraP46yRJHA28mlKx0z9k3JaZmRVY1kRRLlA9gdLQG98Cts8n\nJDMzK5KsiWJS0j8CfwB8W9IObWzLzMwKLOvJ/e3AVcBrI2IjsDtwem5RmZlZYWRt9TQF7AScDHwS\nGAU25hWU9ad25qPOYy5rM+uMrHcUXwKOpJQoADYBX8wlIutL7cxHnddc1mbWGVkTxRER8X7gKYCI\neBRXZg+1dsYs8nhHZsWWtehpWtIIyTDgyfzZW3OLyvpOO2MW1VtncuMUK9dMDmUR1CAU4xUlDmtf\n1kTxBeAKYE9J5wBvBT6eW1TWd9qZj7ree4GhHHK7naHHOzFseZYTvodPHyyZEkVEXCJpNfAqSsNw\nLI6IDblGZn3l2P3ncsl1d82aaaqZMYtWrpnkyWc21329XARVfXJp92q18v3jO44SAY9NTbd15ZvX\nFXRaUVyj7bXz3lqynvDzjsN6q52xnm4FcpvxzvrXyjWTXL56claSEPCWw9IHC6w+CdVTXTTV7tVq\n9fvLkxxl2VZeMVXqRDFe1mHLs57wPXz6YMlUmS3pK5LGK57vJumi/MKyflLrZBLANbemTyhV6321\nVBdftVv53Wi/WSrS86yQb2fo8byHLc96wvfw6YMla6unlyYd7YCZVk8L8wnJ+k0zJ5Na80I3c3VZ\nq/iq3avVdq7MW10/yxV0O5NB5T2RVNYTftEmtLL2ZE0UcyTtVn4iaXcKMv+2dV+jk0m9fhLjO47W\nfN+IlDoJUrtXq+1cmbe6fpYr6HYmg8p7IqmsJ/yiTWhl7cl6cv888DNJlyXP3wack09I1m8aTb5T\nr1hmh+3mMDY6ss37Gp1Q2p3sp9b7KylZpxV5T0DUzmRQeU4k1c58GUWa0Mrak7XV08VJq6djk0Un\nRcQt+YVl/aTRyaRe8ctjU9Oc/45DWj4JtTvZT3m90y5dW/P1oPUK6EGegMgnfMs0cVHReOKiYjvq\n3B/U7CcxMT7GT844rgcRlRQ1LrNu6cjERZKuTf7fJOnxin+bJD2eNVgbbEWt2CxqXGZF01LRU0Qc\nLUnAgRFxV4disgFT1GKZosZlVjRZ58xeFxEHdSCeTFz0ZGbWumaLnrK2erpR0uER8Z8Z329Dot6w\nFh4wzqx/ZE0URwCnSLoD+A2lFoURES/NKzDrf/WGtVh15yNcvnrSA8aZ9YmsieL4dncsaR5wMbAX\npRaJyyPiAknLgBOBZ4BfAu+u7AVu2ZWv4ic3TpUye7J8tx1HOevEA9s6SVffIRy7/1y+fv3dbKkq\n2pya3lJ3uQeMMyumrD2z7wfeApwPnAeclCxrxWbgwxFxAKXZ8t4v6QDgauAlyd3JfwFnZozRKlT2\njgZmDeD36JPTnP7NmzLPKFer5/W/XHfXNsmgrN7yekONm1lvZU0UFwMHAv8b+HvgAOCrrWwgIu6L\niBuTx5uADcBERHwvIsrjTl8H7JMxRqvQaCC86S2ReUa5Zgf3a2REansbZpa/rEVPL0nuBMqukZS5\nZ7akBZQGFby+6qU/Bi6t854lwBKA+fPnZ9310OjEQHhZ3lc9ZEelencaZtZbWe8obpR0ZPmJpCOA\nTO1TJe0MXA6cFhGPVyz/K0rFU5fUel9ELI+IRRGxaO7cuVl2PVQ6MRBeq+8bkWYGiqul3nIz662s\nieIw4KeS7khaPv0MOFzSOkk/b3YjkkYpJYlLImJFxfI/At4IvDMGYYyRAqjVC7nS6Igy90hutG0o\n3Ul8/u0Hs3jhhHtEm/WZrEVPr2t3x0kP7wuBDRFxXsXy1wEfBX4vIp5sdz9WUtkLOe9WT7V6OB+7\n/1yuufXBmv0k3CParL/0bFBASUcDPwbWAVuTxR8DvgDsADycLLsuIt6bti33zDYza12ne2a3LSKu\npdRRr9q3ux2LmZnVl7WOwszMhoQThZmZpWqp6EnSh9Jer6yUNjOzwdBqHcUuyf/7AYcDVybPTwRu\nyCsoMzMrjlYnLjobQNKPgEOToTeQtBT4Vu7RmZlZz2Wto9iL0uiuZc8ky8zMbMBkbR57MXCDpCuS\n54uBr+QTkpmZFUmmRBER50j6DvDKZNG7I2JNfmGZmVlRZCp6SobfOADYNSIuAB6W9LJcIzMzs0LI\nWkfxJeDlwMnJ803AF3OJyMzMCiXznNkRcaikNQAR8aik7XOMy8zMCiLrHcW0pBGSAUglzeXZgf3M\nzGyAZE0UXwCuAPaUdA5wLfDp3KIyM7PCyNrq6RJJq4FXURoBdnFEbMg1MjMzK4RMiULSZyLiL4Fb\naywzM7MBkrXo6TU1lr2+nUDMzKyYWh099n3AnwH7Vs2NvQvw0zwDMzOzYmi16OlrwHcoVVyfUbF8\nU0Q8kltUZmZWGC0VPUXEYxFxB6VBAB+LiDsj4k4gJF3UiQDNzKy3stZRvDQiNpafRMSjwMJ8QjIz\nsyLJmijmSNqt/ETS7mTv5W1mZgWW9eT+eeBnki5Lnr8NOCefkMzMrEiydri7OOlwd2yy6KSIuCW/\nsMzMrCgyFxdFxHpgfdb3S5pHaQKkvSiNGbU8Ii5IirEuBRYAdwBvT+pAzMysB1qqo5B0bfL/JkmP\nV/zbJOnxFve9GfhwRBwAHAm8X9IBlJrdfj8iXgx8n9nNcM3MrMtauqOIiKOT/3dpd8cRcR9wX/J4\nk6QNwATwZuCYZLWvAD8EPDSImVmPtNoz+0Npr0fEeVmCkLSAUvPa64G9kiQC8GtKRVO13rMEWAIw\nf/78LLs1M7MmtNo8dpfk3yLgfZTuACaA9wKHZglA0s7A5cBpETGr+CoigmTOi2oRsTwiFkXEorlz\n52bZtZmZNaHVoqezAST9CDg0IjYlz5cC32p155JGKSWJSyJiRbL4fknPi4j7JD0PeKDV7ZqZWX6y\ndrjbi9IwHmXPUKeIqB5JAi4ENlQVWV0JnJo8PhX414wxmplZDrI2j70YuEHSFcnzxZQqnltxFPAu\nYJ2ktcmyjwHnAt+Q9CfAncDbM8ZoZmY5yNrh7hxJ3wFemSx6d0SsaXEb11KaHa+WV2WJy8zM8pep\n6CkpNjoA2DUiLgAelvSyXCMzM7NCyFpH8SXg5cDJyfNNwBdzicjMzAolax3FERFxqKQ1UBpmXNL2\nOcZlZmYFkfWOYlrSCEkfB0lzga25RWVmZoWRNVF8AbgC2FPSOcC1wN/mFpWZmRVGy0VPSUX2j4DV\nlFonCVgcERtyjs3MzAqg5UQRESHp2xFxEHBrB2IyM7MCyVr0dKOkw3ONxMzMCilzqyfgFEl3AL+h\nVPwUEfHSvAIzM7NiyJoojs81CjMzK6xW56N4DqUhxV8ErAMujIjNnQjMzMyKodU6iq9QmotiHfB6\n4PO5R2RmZoXSatHTAUlrJyRdCNyQf0hmZlYkrd5RTJcfuMjJzGw4tHpHcbCk8nSlAsaS5+VWT7+V\na3RmZtZzrU6FOtKpQMzMrJiydrgzM7Mh4URhZmapnCjMzCyVE4WZmaVyojAzs1ROFGZmlsqJwszM\nUvU0UUi6SNIDkm6uWHaIpOskrZW0StLLehmjmdmw6/UdxZeB11Ut+yxwdkQcAnwieW5mZj3S00QR\nET8CHqleDJSHAtkVuLerQZmZ2SxZJy7qpNOAqyR9jlIie0WtlSQtAZYAzJ8/v3vRmZkNmV4XPdXy\nPuCDETEP+CBwYa2VImJ5RCyKiEVz587taoBmZsOkiIniVGBF8vgywJXZZmY9VMREcS/we8nj44Bf\n9DAWM7Oh19M6CklfB44B9pB0D3AW8B7gAknbAU+R1EOYmVlv9DRRRMTJdV46rKuBmJlZXUUsejIz\nswJxojAzs1ROFGZmlsqJwszMUjlRmJlZKicKMzNL5URhZmapnCjMzCyVE4WZmaVyojAzs1ROFGZm\nlsqJwszMUjlRmJlZqiJOhdoVK9dMsuyq27h34xR7j49x+vH7Acxaduz+c7nm1gdnrbN44cTMeyc3\nTs1sb0Ti5CPm8anFB/Hxlev4+vV3syVi5vWJqvcvvXI9G6emAdhtx1HOOvHAbbY9R7A12YQoTSY+\nPjaKBI8+Oc2IxJaImf/HRufw9OatbI1SPEe+cDfueHhqm89Yve8DnrcLP/3VI1SEy/jYKEvfVIoJ\nqPmZKh217+5c8p6XZzrW4zuOEgEbp7b9TBM1vod630va/ipfb0ZRtmFWTzd/X4o6f/j9ZNGiRbFq\n1aqm11+5ZpIzV6xjanrLzLLkIdRbAAAKXUlEQVTREUHA9Nb6x2NsdIS3HDbB5asnZ7230ov33Ilf\nPPCb1PdfesPd2+xndES84/B5qdtu1+ic0sk35SNus/6ytx3Mqjsf4V+uu6vh+rWSRdZj3Yqx0RE+\nfdJBM4m2en+VrzejKNswqyev35ek1RGxqNF6Q1n0tOyq27Y5GU9viYYnrqnpLXz9+rtTT+T1kkTl\n+2vtZ3pLNNx2u6a3Np8kyusvu+o2vn793U2t/5NfPrLNsqzHuhVT01tYdtVtdfdX+XozirINs3q6\n/fsayqKneyuKjFpVr+glj/e3u+1OuHfjFO1E1c6xzrKfevtrJY6ibMOsnm7/vobyjmLv8bHM7x2R\n2tp32vvb3XYn7D0+1lZc7RzrLPupt79W4ijKNszq6fbvaygTxenH78fY6MisZaMjYnRO+glxbHSE\nk4+Yt817K714z50avr/WfkZH1HDb7RqdIxp8xG3WP/34/Tj5iHlNrX/UvrtvsyzrsW7F2OjITAV5\nrf1Vvt6MomzDrJ5u/75Gli5d2pENd9Py5cuXLlmypOn193/eb7HPbmOsm3yMJ57azMT4GEvfdCCv\nPfC5s5a9+ZC9efiJZ2aef+LEA/izY180895NT22e2eaIxDuPnM+X330EDz3xNOsnH59VZFP5/vm7\n78h1v3qYpzZvBUotj875/YO22facUp0vUGr1BKXWSGPbj/DU9FZGJCLZdwBjo3PYGjGz7BX77s7W\nYNZnPP7A526z78Pmj3NP1S3r+Ngof5tUjB23/141P1Oleq2emjnWu+04ynO2G+Gpzdt+plrfQ63v\npVyBV2t/la9n/X30Yhtm9eT1+zr77LPvW7p06fJG6w1lqyczM3OrJzMzy4kThZmZpeppopB0kaQH\nJN1ctfzPJd0qab2kz/YqPjMz6/0dxZeB11UukHQs8Gbg4Ig4EPhcD+IyM7NETxNFRPwIqO7O+z7g\n3Ih4Olnnga4HZmZmM3p9R1HL7wCvlHS9pP+QdHitlSQtkbRK0qoHH3ywyyGamQ2PIiaK7YDdgSOB\n04FvSNt2DY6I5RGxKCIWzZ07t9sxmpkNjSIminuAFVFyA7AV2KPHMZmZDa0iJoqVwLEAkn4H2B54\nqKcRmZkNsZ6OHivp68AxwB6S7gHOAi4CLkqazD4DnBqD0H28hzyBjpm1o6eJIiJOrvPSKV0NZIBV\nT3AyuXGKM1esA3CyMLOmFLHoyXLkCXTMrF1OFAPOE+iYWbucKAacJ9Axs3Y5UQw4T6BjZu0ayjmz\nh0m5wtqtnswsKyeKIbB44YQTg5ll5qInMzNL5URhZmapnCjMzCyVE4WZmaVyojAzs1QahPH2JD0I\n3NnrONq0Bx4lt5KPx2w+Hs/ysZitnePx/IhoOKHPQCSKQSBpVUQs6nUcReHjMZuPx7N8LGbrxvFw\n0ZOZmaVyojAzs1ROFMWxvNcBFIyPx2w+Hs/ysZit48fDdRRmZpbKdxRmZpbKicLMzFI5UfSApIsk\nPSDp5oplu0u6WtIvkv9362WM3SJpnqRrJN0iab2kDyTLh/V4PEfSDZJuSo7H2cnyF0i6XtJ/S7pU\n0va9jrVbJI1IWiPp/yXPh/lY3CFpnaS1klYlyzr+t+JE0RtfBl5XtewM4PsR8WLg+8nzYbAZ+HBE\nHAAcCbxf0gEM7/F4GjguIg4GDgFeJ+lI4DPA+RHxIuBR4E96GGO3fQDYUPF8mI8FwLERcUhF34mO\n/604UfRARPwIeKRq8ZuBrySPvwIs7mpQPRIR90XEjcnjTZROCBMM7/GIiHgieTqa/AvgOOCbyfKh\nOR6S9gFOAP4peS6G9Fik6PjfihNFcewVEfclj38N7NXLYHpB0gJgIXA9Q3w8kqKWtcADwNXAL4GN\nEbE5WeUeSsl0GPwd8FFga/L8txneYwGli4bvSVotaUmyrON/K57hroAiIiQNVbtlSTsDlwOnRcTj\npQvHkmE7HhGxBThE0jhwBbB/j0PqCUlvBB6IiNWSjul1PAVxdERMStoTuFrSrZUvdupvxXcUxXG/\npOcBJP8/0ON4ukbSKKUkcUlErEgWD+3xKIuIjcA1wMuBcUnlC7t9gMmeBdY9RwFvknQH8H8pFTld\nwHAeCwAiYjL5/wFKFxEvowt/K04UxXElcGry+FTgX3sYS9ckZc4XAhsi4ryKl4b1eMxN7iSQNAa8\nhlK9zTXAW5PVhuJ4RMSZEbFPRCwA/gD4QUS8kyE8FgCSdpK0S/kx8FrgZrrwt+Ke2T0g6evAMZSG\nB74fOAtYCXwDmE9pyPS3R0R1hffAkXQ08GNgHc+WQ3+MUj3FMB6Pl1KqkByhdCH3jYj4pKQXUrqq\n3h1YA5wSEU/3LtLuSoqePhIRbxzWY5F87iuSp9sBX4uIcyT9Nh3+W3GiMDOzVC56MjOzVE4UZmaW\nyonCzMxSOVGYmVkqJwozM0vlRGFmZqmcKGxgSFosKSSlDnkhaVzSn7W5ryfqLN+SDAF9s6TLJO1Y\nZ72ftrP/Zkn63WRo6jnJ8xFJ35P0h93Yvw0GJwobJCcD1yb/pxkH2koUKaaSIaBfAjwDvLfyRZXM\niYhXdGj/s0TEBko9u9+YLDoHuC0iLu7G/m0wOFHYQEgGFTya0twEf1Cx/A8l/TyZCOiryeJzgX2T\nK/9lkhZUTSL1EUlLk8crk5E611eM1tmsHwMvSrZ/m6SLKQ25MK/yjqROjEg6JZnEaK2kf0zuBnaS\n9K1k3ZslvaOJOM4H3ifpLZTGT/pQi5/DhpxHj7VB8WbguxHxX5IelnQY8BTwceAVEfGQpN2Tdc8A\nXhIRh8DM8Ob1/HFEPJKMu/Sfki6PiIcbBZMMWvd64LvJohcDp0bEdcnr5fUOrBWjpN8F3gEcFRHT\nkr4EvBP4DXBvRJyQrLdr8v+3gT+NiHurY4mI70n6PPBp4H9ExHSj+M0q+Y7CBsXJlMb/Ifn/ZEqj\njV4WEQ8BZBz/5i8k3QRcB8yjdMJPM5bMJbEKuIvSgIcAd5aTRJV6Mb4KOIxSclqbPH8hpTGxXiPp\nM5JeGRGPJe97Q60kUeGnwHkR8evyAkl/0+CzmAG+o7ABkFyFHwcclIzFP0JpgpdlTW5iM7Mvmp6T\nbPcY4NXAyyPiSUk/LL+WYqp8p1IRH5TuBFoh4CsRceY2L0iHAm8APiXp+xHxySa2dwDwzxXbeC6l\n2fPMGvIdhQ2CtwJfjYjnR8SCiJgH3A78HHhbMromFUVPm4BdKt5/P7CnpN+WtAPPVvzuCjyaJIn9\nKc3pnbcf1Inx+8BbkwlqkLS7pOdL2ht4MiL+hVIiPLTJ/RxIqX6k7BBgbR4fwAafE4UNgpN5dvjl\nssspVWqfA/xHUnx0HkBSx/CTpDJ4WVJm/0ngBkpTj5ZnDfsusJ2kDZQqwGsVHbUlItbXifEWSnUX\n35P08ySu5wEHATckxVFnAZ+CUh1FkkS2IWkepelDK5v0OlFY0zzMuNkQknQh8J6I2NpwZRt6ThRm\nZpbKRU9mZpbKicLMzFI5UZiZWSonCjMzS+VEYWZmqZwozMwslROFmZmlcqIwM7NUThRmZpbq/wNP\ni6cKUGWQlQAAAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8MqX73B4s5tv",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Multiple Linear Regression \n",
-        "- Our mean squared error for Simple Linear Regression looks kinda high..\n",
-        "  - Let's try Multiple Linear Regression (predicting based on multiple variables rather than just `TAX`) and see if that produces more accurate predictions\n",
-        "\n",
-        "1. Set X to contain all values that are not `PRICE` from the unsplit data\n",
-        "  - i.e. `CRIM`, `ZN`, `INDUS`, `CHAS`, `NOX`, `RM`, `AGE`, `DIS`, `RAD`, `TAX`, `PTRATIO`, `B`, `LSTAT`\n",
-        "  - Y to still represent just 1 target value (`PRICE`)\n",
-        "    - also from the unsplit data\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ZtQK5-f4M0Vg",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# set X to all variables except price\n",
-        "mX = bos.drop('PRICE', axis=1)\n",
-        "# and, like in the simple Linear Regression, set Y to price\n",
-        "mY = bos['PRICE']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RTYG4-UwNDsK",
-        "colab_type": "text"
-      },
-      "source": [
-        "2. Split the data into `multi_X_train`, `multi_X_test`, `Y_train`, and `Y_test`\n",
-        "  - Use `cuML`'s `train_test_split`\n",
-        "    - And the same 70:30 train:test ratio"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "EsKxK8u_F7t8",
-        "colab_type": "code",
-        "outputId": "673a1a44-4d2f-4a45-8333-8f29782eaf65",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 86
-        }
-      },
-      "source": [
-        "# train/test split (70:30)\n",
-        "mX_train, mX_test, mY_train, mY_test = train_test_split(mX, mY, train_size = 0.7)\n",
-        "\n",
-        "# see what it looks like\n",
-        "print(mX_train.shape)\n",
-        "print(mX_test.shape)\n",
-        "print(mY_train.shape)\n",
-        "print(mY_test.shape)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "(354, 13)\n",
-            "(152, 13)\n",
-            "(354,)\n",
-            "(152,)\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_Y40R17LGHsI",
-        "colab_type": "text"
-      },
-      "source": [
-        "3. fit the model with `multi_X_train` and corresponding `PRICE` (*y_train*) values \n",
-        "  - so it can build an understanding of their relationships \n",
-        "4. predict `PRICE` (*y_test*) for the test set of independent (*multi_X_test*) values\n",
-        "  - and compare `PRICE` predictions to actual median house (*y_test*) values\n",
-        "    - use `sklearn`'s `mean_squared_error` to do this"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "N7qm1HuVO-1k",
-        "colab_type": "code",
-        "outputId": "7e291cec-e602-4ad9-a5b3-b70d7261f63d",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "# call Linear Regression model\n",
-        "mlr = LinearRegression()\n",
-        "\n",
-        "# train the model for multiple regression\n",
-        "mlr.fit(mX_train, mY_train)\n",
-        "\n",
-        "# make predictions for test X values\n",
-        "mY_pred = mlr.predict(mX_test)\n",
-        "\n",
-        "# calculate error\n",
-        "mmse = mean_squared_error(mY_test, mY_pred)\n",
-        "print(mmse)"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "16.691811854229723\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jTdmleXCM_Xb",
-        "colab_type": "text"
-      },
-      "source": [
-        "5. visualize with `matplotlib`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Q83NFMK1JKvL",
-        "colab_type": "code",
-        "outputId": "569cfa77-a66e-4b1b-9d70-ae4ef8e7936e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 305
-        }
-      },
-      "source": [
-        "# scatter actual and predicted results\n",
-        "plt.scatter(mY_test, mY_pred)\n",
-        "\n",
-        "# label graph\n",
-        "plt.xlabel(\"Actual Prices: $Y_i$\")\n",
-        "plt.ylabel(\"Predicted prices: $\\hat{Y}_i$\")\n",
-        "plt.title(\"Prices vs Predicted prices: $Y_i$ vs $\\hat{Y}_i$\")\n",
-        "\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEgCAYAAACq+TSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X20HXV97/H3J4cTOCBygkYqAYxP\nCyQiCeJTQ7skSqMimiKKXLXc1tbV1q4K0tRwLy1I5RJXqlivWsuqtqBYAwKRVitYE66KRZuYQIhA\nfUDQg0JUjoIc4CT53j/27GRnZ8/sPbNnP39ea5119syeM/Pbc86Z78zv4ftTRGBmZpZmTq8LYGZm\n/c2BwszMMjlQmJlZJgcKMzPL5EBhZmaZHCjMzCyTA4WZmWVyoDCzoSTpNZJe0+tyDAN5wJ2ZDRtJ\nTwVuShZPiYif97I8g86BwsyGjqSPAtcDY8DrIuKdPS7SQHOgMDOzTG6jMDOzTA4UI07SNkkv73U5\n+oWkf5b0vuR1V85N7TFL2p9/p1YqB4ohI+mHkmYkPSLpgeQi9KS07SNiUUTc3MUitiXv52tHq+cm\nKdMrO1GGInr5O5U0T9LD9YFK0qckXSdJw3jsYedAMZxOi4gnAScAJwIX1G8gab+ul6o8w/75CumH\nzxwRDwGXA+dU10n6K+B5wFujg42ivTz2sHOgGGIRMQX8O/B82H3n+x5JtwO/lrRf7d2wpCOTO6/t\nkn4u6SPVfUk6XNK1yXv3SPrzmvfeI2kquZu7W9Ir6suSbPO5unV/J+nDre6j4OdLLXfyM0skfTs5\n7lrggJr39npSaHR+JH0KOAr41+Qp5y9bOF+px2wkKcf5kr4j6SFJ/yTpgIzP3LTczcpY9HeS+CCw\nXNKzJL0ReAeVnkePZnzGsv4+ch/bWhAR/hqiL+CHwCuT10cC24C/qXlvS7J+onZ7Kt0IbwMuAw6i\ncvE6KdlmDrAJ+GtgLvAs4AfAcuBo4EfA4cm2C4FnNyjXM4BHgYOT5THgJ8BLW91H3s+XVe5k+7nA\nvcC5wDhwBjALvK/BsbLOz+7tWjhfmcfM+Mx3JJ/rUOCWujI2/J1mlbuFc5P6OwE+Bnysyd/hJ4Ev\nAduBE1r4uy3l76PIsf3VwnWl1wXwV8m/0MpF4hFgOrkgfazuAvIHDbZ/JfCy5B9rvwb7fAlwX926\n84F/Ap4DPJjsY7xJ2b4O/F7y+hTg+8nrPPto+fNllTt5/dvA/STdxJN136BxoMg6P7u3a+F8ZR4z\n4zP/cc3ya2rOXervNKvcLZybln8nKWV+PhDAm+rW/xnw3E79fRQ9tr+yv3pep2kdsSIi/iPlvR+l\nrD8SuDcidjR47xnA4ZKma9aNAV+LiO9JOge4CFgk6Ubg3RFxf4P9fAY4C7gS+B/JMjn3kefzpZY7\neX04MBXJVSRxb8p+s85Pvazj5jlmrdrPdW+yn0bv1Usrd+a5KfA7qTcXeBy4rnZlRHyk8eZAeX8f\nRY5tGdxGMXrSGvR+BByV0iD6I+CeiJis+To4Il4DEBGfiYiTqFx8Anh/yjGuAV4u6Qjgd0kuBDn3\n0Uzt58ssN5WqjQV1vWGOStlv1vmpP6dZx81zzFpH1m1fe5HMaqRNK3ezc9Pu7+R44I76ACXp5oyf\nKevvo8ixLYMDhVV9i8pFbLWkgyQdIGlpzXsPJw2KE5LGJD1f0oskHS1pmaT9gceAGWBXowNExHbg\nZipVMPdExJ0AefZR4DM1LHfy/n8CO4A/lzQu6XTgxRn7Sjs/D1Cp42/luHmOWeudko6QdCjwv4G1\nOc5Bo3JnnpsSfieLqbSd7KZK/qUH036gxL+P3Me2bA4UBkBE7AROo1IffB/wY+DMmvdeS+Uf8B7g\nZ8A/AocA+wOrk3U/BZ5Gpa47zWeo1DV/pmZd3n3k+Uxp5SYingBOB/4n8Asqn/e6jH01PD/ApcAF\nkqYl/UXWcfMcs85nqCS5+wHwfaClAXpp5W52bsj4nUj6uKSPNzn08dRdrIEXAFub/FwZfx9Fj20p\nnOvJrM9J+iHwhxntMgMhaWf4YUSsG6VjDwM/UZhZtxwH3D6Cxx547vVkZl0REW8fxWMPA1c9mZlZ\nJlc9mZlZJgcKMzPLNBRtFE996lNj4cKFvS6GmdlA2bRp088iYn6z7YYiUCxcuJCNGzf2uhhmZgNF\nUivpY1z1ZGZm2RwozMwskwOFmZllcqAwM7NMDhRmZpZpKHo9mZn1g3Wbp1hz493cPz3D4ZMTrFx+\nNCuWLOjIfso6ViscKMzMSrBu8xTnX7eVmdmdAExNz3D+dZXM5nku4K3sp6xjtarnVU/JhCmbJf1b\nsvxMSd+U9D1JayXN7XUZzcyaWXPj3bsv3FUzsztZc+Pdpe+nrGO1queBAngXcGfN8vuByyLiOcBD\ngLM+mlnfu396Jtf6dvZT1rFa1dNAkcyNeyqVmbVI5hFeBnwu2eQKYEVvSmdm1rrDJydyrW9nP2Ud\nq1W9fqL4EPCX7Jn/9inAdM2k6D8GGla4SXqHpI2SNm7fvr3zJTUzy7By+dFMjI/ttW5ifIyVy48u\nfT9lHatVPQsUkl4LPBgRm4r8fERcHhEnRsSJ8+c3zWllZtZRK5Ys4NLTj2PB5AQCFkxOcOnpx+Vu\nXG5lP2Udq1U9m7hI0qXA24AdwAHAk4HrgeXAb0TEDkkvAy6KiOVZ+zrxxBPDSQHNzPKRtCkiTmy2\nXc+eKCLi/Ig4IiIWAm8G1kfEW4ANwBnJZmcDn+9REc3MjN63UTTyHuDdkr5Hpc3iEz0uj5nZSOuL\nAXcRcTNwc/L6B8CLe1keMzPboy8ChZlZv+tmyox+40BhZtZEt1Nm9Jt+bKMwM+sr3U6Z0W8cKMzM\nmuh2yox+40BhZtZEt1Nm9Bu3UZhZYaPSwLty+dF7tVFAZ1NmtMLzUZhZ3xulBt7q5+mXoNjtc+9A\nYTbEOnnXmdXAO2yBAioX4H75XN0+926jMBtS1bvOqekZgj13nes2T5Wy/7SG3KnpGZauXl/acWxf\nIzUfhZl1Tqe7dGY15JYdlGxvozYfhZllWLd5iqWr1/PMVV/IfZfe7I6/yD5rNZoTodYojTPotm7P\nR+E2CrM+1W6D5eGTE0w1CBZK9lVkn7VqG3gbHQdGZ5xBt3W7cb1n81GUyfNR2DBaunp9wwvwgskJ\nblm1rOnP1wcaqASJRv/xre6zU2W13uj7+SjMLFu7DZaNZkFLuy1s986/21Uh1l2uejLrU2lVR3ka\nLGu7dK7bPMW5a7c0DBbtNoL22ziDfjEsAxIdKMz6VNmjgdfceHfDIKHkWO3qp3EG/WCYBiQ6UJj1\nqbLv0tOql4LsC1e7d8XDcled1zANSHSgMOtjZd6lp1VlLciodlq3eYqV19zG7K7Ks8jU9Awrr7lt\nd9maGaa76ryGKeOsG7PNRkSRBueLbti2O0hUze4KLrphW0vHHOV5HIYp46yfKMyGQCvVO0WqsqZn\nZnOtrzdMd9V5dTrjrLPHmlnLGlXvnLt2C+es3cKCugtItxucy+i5Nag62RPM2WPNLJdG1TvVyqJ2\nLyDzDhznoUf3fXqYd+D47tdZd7b9OI9DN3UqMDt7rJnl0qwap502gQtPW8T4mPZaNz4mLjxtEdA8\nQ22jQX+Xnn7c0Ddkd1q3q/T8RGE24NKqd2oVvYA0qz5Ju7M97+o9PaM8vqJ83a7Sc6AwG3CNqnfq\ntXMBybrQpwWgnREj0w22F7pdpedAYdZj7fZeqc/iWp/4r5MXkKynmUEdXDYInD22AGePtUHVKMPr\nxPhYW/X43ew22aj8tQTcs/rUjhzb2tdq9lg/UZj1ULMBaUUu+N1sE6ge57yrb2Nng5vOUegGOwoc\nKMx6KGsWurR+8tBfWVqrxx7lbrDDzoHCrIcOmRhvOMpZ0PBJ46IbtvH4jl19lzvJacaHmwOFjZR+\ny2QqNV6f1nLYKKj0S6Oxu8EOLwcKGxn9mMl0usGo5yJGIXeS9Y5HZtvIyJPJdN3mKZauXs8zV32B\npavX7x5pXLa0xt55B443zPRamzqjlf2YlcFPFDbUaquaWp0vuptPHmkDp6opMuqrycCNxtZ9PQsU\nkg4Avgrsn5TjcxFxoaRnAp8FngJsAt4WEU/0qpw2uJr18a+qvxvvZsK1Zo3AacfL286ybvMUF92w\nbXcbx7wDx7nwtEVuU7CW9PKJ4nFgWUQ8Imkc+LqkfwfeDVwWEZ+V9HHg7cDf97CcNqAaXfDrNbob\n73bCtbyNwHm3r5+lDuChR2dZ+bnWZ6qz0dazNoqoeCRZHE++AlgGfC5ZfwWwogfFsyGQdWEXlbvq\n/febw7lrt+zVDjFMM5NBJWDWz1IHMLszRmKmOWtfTxuzJY1J2gI8CHwZ+D4wHRE7kk1+DPh2xwpJ\nu7AvmJzgsjMX89jsLqZnZvdJj11kytB+lhUw3VvKWtHTQBEROyNiMXAE8GLgmFZ/VtI7JG2UtHH7\n9u0dK6MNrqwLfrN2iGGaQyHrSWhQn5Ksu/qi11NETEvaALwMmJS0X/JUcQTQsF9iRFwOXA6VpIBd\nK6wNlAPG5+wOCJMT41z0ukoD7rlrtzTcvnqHPUyDx1YuP3qfNgqoTEA0qE9J1l09e6KQNF/SZPJ6\nAjgFuBPYAJyRbHY28PnelNAGWbXHU+00no/v2LX7ddqddEBHx01Uy9aNMRpVK5YsYM0bj2dyYs8Y\njHkHjrPmjOOHJhhaZ/UszbikF1BprB6jErCujoiLJT2LSvfYQ4HNwFsj4vGsfTnNuNVbunp9w3kS\nFkxOcMuqZU27zrab6jtNJ9KKmxXV92nGI+J2YEmD9T+g0l5hVlizLq71k/3U69S4iW6O0TAri1N4\n2FBqpYvriiULuGXVMlLy8nWkR1C3x2iYlcGBwoZSsy6ute0Ec1JSuHaiR9CwjdGw0dB21ZOki5P9\nbAG2RMR/t10qs0SetOD1277hhQvYcNf2fX62vp2g0cxsnRo3kZbbyb2PrJ/lChSS3hoRn65dFxF/\nLekwYDHwu5KeExF/VGYhbTTlSc7XaNtrN001bCROS+0xJrEroq15KpoFNk/wY4Mo7xPF2yS9CHh3\nROz+T4uIB4Abky+zUuRp+E3b9ryr981nlNYesCuCe1afWri8rQa2YRqjYaMhs41C0iJJV9WsejUw\nA6yXNL+jJbORl6fhN23bnRGcf91WLli3teNtEnnmuzAbJM2eKP6DymhpACJiF7BK0unA1yR9kErb\nxB0R8Wjnimmj6PDJiYZdVxtd0NPmnobKxfqqW+/bPR9Fp9ok3KPJhlWzXk+/A1xSu0LSa4E/BJ4A\nTgD+FviRpO91pIQ2svIk50ube7oqa1ipgDe8sP3qIPdosmGV+UQREVuBt1SXJd0DfIfKfBFfrt1W\n0hEdKaGNrOqF+73/um13Ko7992t8b9PO3NMBbLirklgyTy+reu7RZMMqb2P2qyPirkZvRMSPSyiP\n2T4em92To2l6ZnZ3AzHs6T00R2pYpQSVJ4ZmiWrun55pewpU92iyYdWzXE9lcq6n4ZWWs2lyYpzH\nd+xqOoPd5MQ4iw4/mFu+/4vM7RYk1UONjjUm8YE3OYGeDZ++z/Vko6VolU5aQ3Baw3X9WAhg95Sf\nWU4+Zj5X3Xpfw/eqPaeg8fgNP0HYsHMKD+u4apXO1PTMPrPJNZO3Ibg6FuKWVcsAOO/q25jd2fyp\necNd2zOP1aibazufy2yQOFBYx7UzviCt59O8A8cbbl+92Fcv4mntFvWmpmcaVjvVqn+68bgJGxWF\nq54k/UZE/DRt2ayqnfEFaQ3EQGYPo7Q0He2of+LI+7lcTWWDqp02ik8Ap2YsmwH5Bs41kpXyIu3C\nW/Ygt/E5+04bmvW56oPCycfM59pNU4V7VJn1UuGqp4g4NWvZrKpR9RHArx/f0VZ9fnU+icvOXAzA\nOWu38Ozzv8jCjDQdhTXYXVq12MnHzN+n7eKqW+9zNZUNrEKBQtIbJR2cvL5A0nWS9pmtzgwqF/RL\nTz9un3aF6ZlZzlm7hSUX31Q4YNQ2KMOe9BxpaTo+dObi1ImKsszujH0u6tXPtWByAlHpYnvp6cex\n4a7t+wSFtJYSp/ewQVD0ieKvIuJhSScBr6RS7fTx8oplw2bFkgUcOLdxTedDj84W7i3UrC1iTNrr\nIr5iyYLCKTUaXdSrTzXVnlYrlizIdfF3eg8bBEUDRfU/81Tg8oj4AjC3nCLZsMq6gBathml2Ud4Z\nwWVnLt59EYf0qrBm6i/qtbPkLV29fnegS7v41z/JOL2HDYqijdlTkv6BStLA90vaH3e1tSbSGn+r\nWrkTr28kzsoaW1XfaFzfk2rywHEeeWwHs7vSu9LWX9Sz0n2k5XxKm3HPrN8VDRRvAl4F/G1ETEt6\nOrCyvGLZMGp0Aa2VdideDQ5T0zN75W1qNu6hqtFkR7U9qZauXr876WCtrBnvssZQVAf7uSusDYui\ngWIGOAg4C7gYGAemyyqUDafqhfKiG7bt8xSQVg1Tf+deNDNZ1tNKkRnvmo2h8Cx2NkyKBoqPAbuA\nZVQCxcPAtcCLSiqXDZhWB5NVL6BZ29e+l5UVNo9DJhqP5Ib0KrE5Eus2TzX8HO2ODTEbJEUDxUsi\n4gRJmwEi4iFJbsweUUXSc6fdcdfvq4wgAfDrJ3akXvTTqsSykgF67gkbJUUboGcljZHUBCTzZ+/K\n/hEbVmXmPOpE6g1oPA6iqjoeYqzBIL20z5E2hsLVTTaMij5RfBi4HniapEuAM4ALSiuVDZQy54ru\n5AC0rH2vWLKAc9duyfVzboewUVEoUETEVZI2Aa+g0j18RUTcWWrJbGCUWV/frAttO5qVx+0OZo21\nk+vproj4aER8xEFitKXlPCpSX9/qYDhRmb1uTov5OFopT5mfw2yYFM31dIWkyZrleZI+WV6xbJDk\nra9PG9Fcv680CyYnuOzMxTy+YxcZY+Rytx+43cGssUJzZkvaHBFLmq3rFs+ZPTguWLeVq269b6/x\nEBPjYw0vyPU9oGq3rQ7AS7NgcmL3wDcza6zVObOLVj3NkTSv5mCH4vm3rYkL1m3l03VBAio9i85Z\nu4UL1m3dva46lmJmdufu3khztGfbZu0YJx8zv+zim42sohf3DwD/KemaZPmNwCXlFMmG0brNU3z6\n1vsyt6m+f+IzDm04liKrmqnehru2Fyuome2jaK+nK5NeTycnq06PiO+UVywrW6+n4Wx1TMVVt97H\nv3zzR20PtPM8D2blKVxdFBHbgG0llsU6pMjI6bK1euEOyhmNnbdLa68DqVk/y9VGIenryfeHJf2q\n5uthSb/Kua8jJW2Q9B1J2yS9K1l/qKQvS/pu8n1es31ZtjJHThc1eWB6rqVOyNOltXaWvOrUpUUn\nUjIbRrkCRUScJEnAooh4cs3XwRHx5JzH3gGcFxHHAi8F3inpWGAV8JWIeC7wlWTZ2lDmyOki1m2e\n4pHHdnTlWADzDhzP9TRQZiDN6vprNqhyVz1FREj6AnBcOweOiJ8AP0lePyzpTmAB8Hrg5clmVwA3\nA+9p51ijLm3E8SET4yxdvb7j1S1rbrw7c1KgMo2PiQtPW5TrZ8oKpP1QxWfWCUW7x35bUmkpxSUt\nBJYA3wQOS4IIwE+Bw1J+5h2SNkrauH27e7hkaTTieHyO+PUTO/aqbll5zW0sufimUu+G122eKjUl\nx9gcMZ4yHPuguWMcNHc/zl27JVf509oz8rZzpD2ZnHf1bX6ysIFWNFC8BLhV0vcl3S5pq6Tbi+xI\n0pOozGVxTkTs1c4RldGADW9FI+LyiDgxIk6cP9995rM0GnH8pAP2Y3bn3qd2dlfw0KOzpdXTV++w\ny7RzVzB3vzl7fZYPnbmYD525mF0B0zP5y19W6o60J5BqunIHCxtURXs9LS/j4JLGqQSJqyLiumT1\nA5KeHhE/SaZYfbCMY426+kynz1z1haY/Uz+FaLOeQfXvP/rEjo6kDP/1Ezu55Hf3PvbS1etT2xla\nSd0B7U9dmpXQsNWymPWjooHiAeBPgZOo3PF/Hfj7PDtIGsU/AdwZER+seesG4GxgdfL98wXLaBla\nzdJavUtuVv/e6P0sotITqtFc1a2ov+i2285QRsrwZnOCe2yHDaqiVU9XAouA/wt8BDgW+FTOfSwF\n3gYsk7Ql+XoNlQBxiqTvAq9Mlq1kraa4qNbTN6t/zzvhUACPze5iXsFus/UX3bLaGdqRNQFSt8ti\nVqaiTxTPT7q1Vm2QlGtkdkR8ncqNZSOvKFgua1ErKS5q6+mb1b8XqWKamd3J/vvNYWJ8LPfP1190\n+2Vq0upTST+Uxaws7fR6eml1QdJLAKdvHSBZ1SCNUmxn3Q3XJu7L65czs03TitfvudFFt59ShPdT\nWczKUDTN+J3A0UA1y9tRwN1UBtFFRLygtBK2oNtpxoch3cPS1esbtiOMSXzgTce3lPK7Xv2TwfiY\nIMgcQ1GbDjwtrfgbXriADXdtH+jzbdaPWk0zXrTq6VUFf27gDcugqrSG12pVEuz9eaqvz7v6toa5\nmBYkF/D6AArsnjtCsM88FCcfM3+vQX8OCmb9p9ATRb/p5hNF2p34IE6Us27zVOaFv9HnyZpMqNkF\nvf5J7ORj5nPtpqlC+zKz9nV64qKR1eu8Sc3kyTW0YskCdqXcKKR9nqL1742q6zbctb3nyQrNrDnP\nSpdT2viDfuj6WKRarMjnyTvmIK1cHm9gNhj8RJFTWekeOqFIFtQyP0/a00xauTzewGww5HqikPTu\nrPfrRlgPpbLSPXRCkWqxsj7PBeu2clXNfNi1TzNZYzDqe0r1S9A1sz3yVj0dnHw/GngRlXQbAKcB\n3yqrUP2ujHQPnVC0Wqzdz7Nu89ReQaKq+jSTVq60nlL9eG7NRlmuQBER7wWQ9FXghIh4OFm+CGie\nZc46qlejk9fceHfjFL9UniYuO3Nxarn6Neia2R5FG7MPA56oWX6ClHkjrHt6VS2WVbV1+OREX1fX\nmVlzRQPFlcC3JF2fLK+gMhud9Vgv7tDTqpbEnrmr/eRgNrgK9XqKiEuA3wceSr5+PyL+T5kFs8HR\nqOeUgLe89KiGqUA8p7TZYCn0RJHMJXEscEhEXCzpKEkvjoiRadC2PVqtWhqW9Cdmo6ZoUsC/B3YB\nyyLieZLmATdFRGnzaOfR7aSAvVAd2Tw1PcOYxM6I3b2G2r3IdivJ4TClPzEbBp1OCviSiDhB0maA\niHhI0tyC+7Im6u/Eq7mZyrgjX7d5ipXX3LY7w+vU9Awrr7mtrX2m6ff0J2bWWNGR2bOSxkiSgUqa\nT+UJwzoga/a4dnMjXXTDtn3SgM/uCi66YVvhfabph1nozCy/ok8UHwauB54m6RLgDOCvSivVkMtb\n1dPsjjvvHXnt8dMqHqdnis1lnSVrnMcwzPFhNqwKBYqIuErSJipTlgpYERF3llqyIVVm4r7a91s9\n9kU3bCs9CLR6kU9r9AbcyG3Wx4r2enp/RLwHuKvBOsuQlbgv7aKYNskQpI+8bmXuhyzzDhxvabu8\nga/ReIqlq9fnPidm1j1Fq55OAeqDwqsbrBtJWXfY7Sbuq+/1dPIx81lz492cu3ZL5h16o1xMacbH\nxIWnLWpp2yKBr54buc36W97ssX8C/CnwbEm317x1MPCNMgs2qJrdYZeZuC/tWAeMz9nn4t0sSIxJ\n7IrI3T5QxkW+n+f4MLP8vZ4+QyVT7OeT79WvF0bEW0ou20BqNidEmfM/pB3roUfztUFMjI/xgTcd\nzz2rT+WWVctyVfeU0ZOpn+f4MLOcgSIifhkRP6SSBPCXEXFvRNwLhKRPdqKAg6bZHXazqUTzpLgo\no2rmoLntzVFdxkW+6PSqZtYdRdsoXhAR09WFZMDdkpLKNNBaqUZJS5CXt2E47ViTE+P8+okdzO5s\n3ioxeeDcti7IZWWGddJAs/5VdMDdnCRtBwCSDsXzbwPt3WHnnco07VgXvW4RB81t7deR56kk7Wln\nxZIF3LJqWaGqKzPrf0Uv7h8A/lPSNcnyG4FLyinSYGvnDjtvw3DWsc5du6Wl8uYZg+GxDmajqeiA\nuyuTAXcnJ6tOj4jvlFeswVa0GqVI75+0YzUbpAf52hLK6AZrZoOpaNUTEbEtIj6SfA18kOiHeRLK\n7P2zcvnRjM9R6vt5G4w91sFsdOUdR/H1iDhJ0sPs3TVfQETEk0stXZf0S7VKmVOGVn+mNmXHvAPH\nufC0RV172jGz4VBoPop+0+58FKMwT0K7SffqgylUnnbcjdVscHVkPgpJ7856PyI+mGd//WLYq1XK\neGIq82nHzAZL3sbsg5PvRwMvAm5Ilk8DBnYa1GGvVimrIdpjHcxGU96R2e+NiPcCRwAnRMR5EXEe\n8ELgqE4UsBuGPYXEsD8xmVlnFe31dBiVNB5VTyTrcpH0SUkPSrqjZt2hkr4s6bvJ93lZ+yjDsKeQ\n8MxyZtaOogPurgS+Jen6ZHkFcEWB/fwz8JFkf1WrgK9ExGpJq5LljqcvH+ZqlayZ5czMmik64O4S\nSf8O/Fay6vcjYnOB/XxV0sK61a8HXp68vgK4Gc9z0RY3RJtZO4rOcCfgWOCQiLhY0lGSXhwRZTRo\nHxYRP0le/5QCVVq2r24+MXn+a7PhUrSN4mPAy4CzkuWHgY+WUqIaURnk0XCgh6R3SNooaeP27dvL\nPrQVVO2KOzU9Q7CnK24vRrqbWTmKBoqXRMQ7gcegkmYcmFtSmR6Q9HSA5PuDjTaKiMsj4sSIOHH+\n/PklHdralTcDrpn1v6KBYlbSGMndvqT5wK6SynQDcHby+mwqs+nZgHBXXLPhU7TX04eB64GnSboE\nOAO4IO9OJP0LlYbrp0r6MXAhsBq4WtLbgXuBNxUs41Dq9/r/YR+8aDaKcgeKpCH7q8Am4BVUEgKu\niIg78+4rIs5KeesVefc1CvoleWEWd8U1Gz65A0VEhKQvRsRxwF0dKFPf69VdfZ5UHL0qo7vimg2f\nolVP35b0ooj4r1JLMwB6eVffav1/r588hnnwotkoKtzrCbhV0vcl3S5pq6TbyyxYv+plr55WU3G4\n55GZlanoE8XyUksxQHrZq6fV+n/3PDKzMuWdj+IA4I+B5wBbgU9ExI5OFKxf9bJXT6v1/+55ZGZl\nyvtEcQUwC3wNeDWVNB7vKrtu6ZQDAAAJq0lEQVRQ/azVu/pONSa3Uv/vnkdmVqa8geLYpLcTkj7B\nAE9WVFQrd/X90JjcrIxmZq3KNWe2pG9HxAlpy73S7pzZZRuFObjNbPB1ZM5s4HhJv6oeA5hIlkVl\niMWTc+6vZzo5zsCNyWY2THIFiogYa75V/+t01ZAbk81smBQdRzHQOj3OYNjn4Daz0VJ0HMVAK6tq\nKK36yo3JZjZMRjJQlFE11Kz6ymkszGxYjGTVUxlVQ06TYWajYiSfKMqoGnLPJjMbFSMZKKD9DKfu\n2WRmo2Ikq57K4J5NZjYqRvaJol3u2WRmo8KBog3u2WRmo8BVT2ZmlsmBwszMMjlQmJlZJrdRlKiT\nGWnNzHrFgaIkvZ6syMysU1z1VBKn9DCzYeVAURKn9DCzYeVAUZK01B1O6WFmg86BoiRO6WFmw8qN\n2SVxSg8zG1YOFCVySg8zG0auejIzs0wOFGZmlsmBwszMMjlQmJlZJgcKMzPL1JeBQtKrJN0t6XuS\nVvW6PGZmo6zvAoWkMeCjwKuBY4GzJB3b21KZmY2uvgsUwIuB70XEDyLiCeCzwOt7XCYzs5HVj4Fi\nAfCjmuUfJ+v2IukdkjZK2rh9+/auFc7MbNT0Y6BoSURcHhEnRsSJ8+fP73VxzMyGVj8GiingyJrl\nI5J1ZmbWA/0YKP4LeK6kZ0qaC7wZuKHHZTIzG1l9lxQwInZI+jPgRmAM+GREbOtxsczMRlbfBQqA\niPgi8MVel8PMzPqz6snMzPqIA4WZmWVyoDAzs0wOFGZmlsmBwszMMjlQmJlZJgcKMzPL5EBhZmaZ\nHCjMzCyTA4WZmWVyoDAzs0wOFGZmlqkvkwL2s3Wbp1hz493cPz3D4ZMTrFx+NCuW7DMBn5nZ0HCg\nyGHd5inOv24rM7M7AZianuH867YCOFiY2dBy1VMOa268e3eQqJqZ3cmaG+/uUYnMzDrPgSKH+6dn\ncq03MxsGDhQ5HD45kWu9mdkwcKDIYeXyo5kYH9tr3cT4GCuXH92jEpmZdZ4bs3OoNli715OZjRIH\nipxWLFngwGBmI8VVT2ZmlsmBwszMMjlQmJlZJgcKMzPL5EBhZmaZFBG9LkPbJG0H7u11Odr0VOBn\nvS5EH/H52MPnYm8+H3tr53w8IyLmN9toKALFMJC0MSJO7HU5+oXPxx4+F3vz+dhbN86Hq57MzCyT\nA4WZmWVyoOgfl/e6AH3G52MPn4u9+XzsrePnw20UZmaWyU8UZmaWyYHCzMwyOVD0gKRPSnpQ0h01\n6w6V9GVJ302+z+tlGbtF0pGSNkj6jqRtkt6VrB/V83GApG9Jui05H+9N1j9T0jclfU/SWklze13W\nbpE0JmmzpH9Llkf5XPxQ0lZJWyRtTNZ1/H/FgaI3/hl4Vd26VcBXIuK5wFeS5VGwAzgvIo4FXgq8\nU9KxjO75eBxYFhHHA4uBV0l6KfB+4LKIeA7wEPD2Hpax294F3FmzPMrnAuDkiFhcM3ai4/8rDhQ9\nEBFfBX5Rt/r1wBXJ6yuAFV0tVI9ExE8i4tvJ64epXBAWMLrnIyLikWRxPPkKYBnwuWT9yJwPSUcA\npwL/mCyLET0XGTr+v+JA0T8Oi4ifJK9/ChzWy8L0gqSFwBLgm4zw+UiqWrYADwJfBr4PTEfEjmST\nH1MJpqPgQ8BfAruS5acwuucCKjcNN0naJOkdybqO/694hrs+FBEhaaT6LUt6EnAtcE5E/Kpy41gx\naucjInYCiyVNAtcDx/S4SD0h6bXAgxGxSdLLe12ePnFSRExJehrwZUl31b7Zqf8VP1H0jwckPR0g\n+f5gj8vTNZLGqQSJqyLiumT1yJ6PqoiYBjYALwMmJVVv7I4ApnpWsO5ZCrxO0g+Bz1Kpcvo7RvNc\nABARU8n3B6ncRLyYLvyvOFD0jxuAs5PXZwOf72FZuiapc/4EcGdEfLDmrVE9H/OTJwkkTQCnUGm3\n2QCckWw2EucjIs6PiCMiYiHwZmB9RLyFETwXAJIOknRw9TXwO8AddOF/xSOze0DSvwAvp5Ie+AHg\nQmAdcDVwFJWU6W+KiPoG76Ej6STga8BW9tRD/y8q7RSjeD5eQKVBcozKjdzVEXGxpGdRuas+FNgM\nvDUiHu9dSbsrqXr6i4h47aiei+RzX58s7gd8JiIukfQUOvy/4kBhZmaZXPVkZmaZHCjMzCyTA4WZ\nmWVyoDAzs0wOFGZmlsmBwszMMjlQ2NCQtEJSSMpMeSFpUtKftnmsR1LW70xSQN8h6RpJB6Zs9412\njt8qSc9LUlPPSZbHJN0k6fe6cXwbDg4UNkzOAr6efM8yCbQVKDLMJCmgnw88Afxx7ZuqmBMRv9mh\n4+8lIu6kMrL7tcmqS4C7I+LKbhzfhoMDhQ2FJKngSVTmJnhzzfrfk3R7MhHQp5LVq4FnJ3f+ayQt\nrJtE6i8kXZS8Xpdk6txWk62zVV8DnpPs/25JV1JJuXBk7RNJShmR9NZkEqMtkv4heRo4SNIXkm3v\nkHRmC+W4DPgTSW+gkj/p3Tk/h404Z4+1YfF64EsR8d+Sfi7phcBjwAXAb0bEzyQdmmy7Cnh+RCyG\n3enN0/xBRPwiybv0X5KujYifNytMkrTu1cCXklXPBc6OiFuT96vbLWpURknPA84ElkbErKSPAW8B\nfg3cHxGnJtsdknz/IvCHEXF/fVki4iZJHwAuBX47Imabld+slp8obFicRSX/D8n3s6hkG70mIn4G\nUDD/zZ9Lug24FTiSygU/y0Qyl8RG4D4qCQ8B7q0GiTppZXwF8EIqwWlLsvwsKjmxTpH0fkm/FRG/\nTH7uNY2CRI1vAB+MiJ9WV0j6myafxQzwE4UNgeQufBlwXJKLf4zKBC9rWtzFDva+aTog2e/LgVcC\nL4uIRyXdXH0vw0z1SaWmfFB5EshDwBURcf4+b0gnAK8B3ifpKxFxcQv7Oxb4p5p9/AaV2fPMmvIT\nhQ2DM4BPRcQzImJhRBwJ3APcDrwxya5JTdXTw8DBNT//APA0SU+RtD97Gn4PAR5KgsQxVOb0Ltv6\nlDJ+BTgjmaAGSYdKeoakw4FHI+LTVALhCS0eZxGV9pGqxcCWMj6ADT8HChsGZ7En/XLVtVQatS8B\n/l9SffRBgKSN4ZakMXhNUmd/MfAtKlOPVmcN+xKwn6Q7qTSAN6o6aktEbEsp43eotF3cJOn2pFxP\nB44DvpVUR10IvA8qbRRJENmHpCOpTB9a26XXgcJa5jTjZiNI0ieAP4qIXU03tpHnQGFmZplc9WRm\nZpkcKMzMLJMDhZmZZXKgMDOzTA4UZmaWyYHCzMwyOVCYmVkmBwozM8vkQGFmZpn+P0oQ58T6BoBj\nAAAAAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2X1RA6sgtZQ6",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Conclusion\n",
-        "- looks like the multiple regression we ran does provide more accurate predictions than the simple linear regression\n",
-        "  - this will not always be the case, so always be sure to check and confirm if the extra computing is worth it\n",
-        "\n",
-        "Anyways, that's how you implement both Simple and Multiple Linear Regression with `cuML`. Go forth and do great things. Thanks for stopping by!"
-      ]
-    }
-  ]
-}

From 533f9238b768442f154a4cf338dec04ecf7fde28 Mon Sep 17 00:00:00 2001
From: Winston Robson <43570913+gumdropsteve@users.noreply.github.com>
Date: Thu, 26 Mar 2020 14:34:51 -0700
Subject: [PATCH 7/7] [WIP] Zestimate (#2)

* running locally; general update to allow current breaks to be more understandable

* making issues easier to understand; working on flow of notebook
---
 .../zillow_kaggle_zestimate_comp.ipynb        | 7756 +++++++++--------
 1 file changed, 4026 insertions(+), 3730 deletions(-)

diff --git a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
index c8d68291..4dfce9de 100644
--- a/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
+++ b/colab_notebooks/zillow_kaggle_zestimate_comp.ipynb
@@ -1,3734 +1,4030 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "scfLT2i0MLyD"
+   },
+   "source": [
+    "# Environment Sanity Check #\n",
+    "\n",
+    "Click the _Runtime_ dropdown at the top of the page, then _Change Runtime Type_ and confirm the instance type is _GPU_.\n",
+    "\n",
+    "Check the output of `!nvidia-smi` to make sure you've been allocated a Tesla T4.\n",
+    "\n",
+    "#Setup:\n",
+    "\n",
+    "1. Install most recent Miniconda release compatible with Google Colab's Python install  (3.6.7)\n",
+    "2. Install RAPIDS libraries\n",
+    "3. Set necessary environment variables\n",
+    "4. Copy RAPIDS .so files into current working directory, a workaround for conda/colab interactions\n",
+    "- **TLDR**\n",
+    "  - Hit `Shift` + `Enter`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "name": "zillow_kaggle_zestimate_comp.ipynb",
-      "version": "0.3.2",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU"
+     "base_uri": "https://localhost:8080/",
+     "height": 312
+    },
+    "colab_type": "code",
+    "id": "W-um5d-x7o46",
+    "outputId": "a604e66b-95d7-44fb-f8d3-848fcedaf796"
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"make sure we have the right GPU\n",
+    "> column 1 row 3 == Tesla T4\n",
+    "\"\"\"\n",
+    "# display gpu specs\n",
+    "!nvidia-smi"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "scfLT2i0MLyD",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Environment Sanity Check #\n",
-        "\n",
-        "Click the _Runtime_ dropdown at the top of the page, then _Change Runtime Type_ and confirm the instance type is _GPU_.\n",
-        "\n",
-        "Check the output of `!nvidia-smi` to make sure you've been allocated a Tesla T4.\n",
-        "\n",
-        "#Setup:\n",
-        "\n",
-        "1. Install most recent Miniconda release compatible with Google Colab's Python install  (3.6.7)\n",
-        "2. Install RAPIDS libraries\n",
-        "3. Set necessary environment variables\n",
-        "4. Copy RAPIDS .so files into current working directory, a workaround for conda/colab interactions\n",
-        "- **TLDR**\n",
-        "  - Hit `Shift` + `Enter`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "W-um5d-x7o46",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 312
-        },
-        "outputId": "a604e66b-95d7-44fb-f8d3-848fcedaf796"
-      },
-      "source": [
-        "\"\"\"make sure we have the right GPU\n",
-        "> column 1 row 3 == Tesla T4\n",
-        "\"\"\"\n",
-        "# display gpu specs\n",
-        "!nvidia-smi"
-      ],
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "Mon Sep  9 14:17:51 2019       \n",
-            "+-----------------------------------------------------------------------------+\n",
-            "| NVIDIA-SMI 430.40       Driver Version: 418.67       CUDA Version: 10.1     |\n",
-            "|-------------------------------+----------------------+----------------------+\n",
-            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
-            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
-            "|===============================+======================+======================|\n",
-            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
-            "| N/A   68C    P0    28W /  70W |      0MiB / 15079MiB |      0%      Default |\n",
-            "+-------------------------------+----------------------+----------------------+\n",
-            "                                                                               \n",
-            "+-----------------------------------------------------------------------------+\n",
-            "| Processes:                                                       GPU Memory |\n",
-            "|  GPU       PID   Type   Process name                             Usage      |\n",
-            "|=============================================================================|\n",
-            "|  No running processes found                                                 |\n",
-            "+-----------------------------------------------------------------------------+\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "kkEdr1VmigyU",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Install RAPIDS AI"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "p129YxxnihcV",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "!wget -nc https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
-        "# RAPIDS 0.10 nightly\n",
-        "!bash rapids-colab.sh \n",
-        "\n",
-        "import sys, os\n",
-        "\n",
-        "sys.path.append('/usr/local/lib/python3.6/site-packages/')\n",
-        "os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'\n",
-        "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1CsdVW7SU9Li",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Zillow Kaggle Competition RAPIDS Conversion\n",
-        "- initially based off eswar3's [Zillow prediction models]( https://github.com/eswar3/Zillow-prediction-models) repo\n",
-        "## Download Data\n",
-        "- to download the data, please plug in your kaggle api username & key\n",
-        "  - you can set up your kaggle api at `https://www.kaggle.com/YOUR USERNAME HERE/account`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "x1dLRTm168Tk",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# 5b4ecdb3cb122fb692a8349124960424\n",
-        "# Info on how to get your api key (kaggle.json) here: https://github.com/Kaggle/kaggle-api#api-credentials\n",
-        "!pip install kaggle\n",
-        "!mkdir /root/.kaggle\n",
-        "# plug api -- get your own API key\n",
-        "!echo '{\"username\":\"warobson\",\"key\":\"\"}' > /root/.kaggle/kaggle.json\n",
-        "!chmod 600 /root/.kaggle/kaggle.json\n",
-        "# !kaggle datasets download\n",
-        "!kaggle competitions download -c zillow-prize-1\n",
-        "\n",
-        "# unzip kaggle data\n",
-        "!unzip -q \"/content/sample_submission.csv.zip\"\n",
-        "!unzip -q \"/content/train_2016_v2.csv.zip\"\n",
-        "!unzip -q \"/content/properties_2016.csv.zip\"\n",
-        "!unzip -q \"/content/train_2017.csv.zip\"\n",
-        "!unzip -q \"/content/properties_2017.csv.zip\""
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LICr9uz8do9K",
-        "colab_type": "text"
-      },
-      "source": [
-        "#### How is the data saved?\n",
-        "- inside content directory "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "6n75DyJ-dm4B",
-        "colab_type": "code",
-        "outputId": "64ac687e-39d6-4bb1-f4b7-5476c9de3b84",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 173
-        }
-      },
-      "source": [
-        "# display content folder contents\n",
-        "!ls \"/content/\""
-      ],
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0.9\t\t\t\t  sample_data\n",
-            "env-check.py\t\t\t  sample_submission.csv\n",
-            "__MACOSX\t\t\t  sample_submission.csv.zip\n",
-            "Miniconda3-4.5.4-Linux-x86_64.sh  train_2016_v2.csv\n",
-            "properties_2016.csv\t\t  train_2016_v2.csv.zip\n",
-            "properties_2016.csv.zip\t\t  train_2017.csv\n",
-            "properties_2017.csv\t\t  train_2017.csv.zip\n",
-            "properties_2017.csv.zip\t\t  zillow_data_dictionary.xlsx.zip\n",
-            "rapids-colab.sh\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Lpa1b4edIXuT",
-        "colab_type": "text"
-      },
-      "source": [
-        "# Imports\n",
-        "### RAPIDS\n",
-        "* `cuDf`\n",
-        "  - words here\n",
-        "* `cuML`\n",
-        "  - words here\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ZKN5zuROroJD",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# rapids \n",
-        "import cudf, cuml \n",
-        "# switch to cupy next update (once docker has it)\n",
-        "import numpy as np\n",
-        "# general \n",
-        "import seaborn as sns\n",
-        "import matplotlib.pyplot as plt"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YJeywzd2efw7",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Data\n",
-        "* `properties_2016`\n",
-        "  - aprox. 27,000,000 residential properties \n",
-        "  - 58 attributes each\n",
-        "* `train_2016_v2`\n",
-        "  - 90,000 transaction records for closings in the year 2016\n",
-        "    * Merge datasets on `property_id`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "2EfApIzCfEtr",
-        "colab_type": "code",
-        "outputId": "bc1e37d1-9ab8-4561-fa39-5af420480a72",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 156
-        }
-      },
-      "source": [
-        "# import 2016 properties\n",
-        "prop2016 = cudf.read_csv('/content/properties_2016.csv')\n",
-        "# peek display 2016 properties\n",
-        "print(prop2016.head())"
-      ],
-      "execution_count": 154,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "   parcelid airconditioningtypeid  ... taxdelinquencyyear censustractandblock\n",
-            "0  10754147                  null  ...               null                null\n",
-            "1  10759547                  null  ...               null                null\n",
-            "2  10843547                  null  ...               null                null\n",
-            "3  10859147                  null  ...               null                null\n",
-            "4  10879947                  null  ...               null                null\n",
-            "\n",
-            "[5 rows x 58 columns]\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uynoUxpx8Xsn",
-        "colab_type": "code",
-        "outputId": "b64b7b32-c1f9-4cf3-c50d-36e90dc51a64",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 121
-        }
-      },
-      "source": [
-        "# import train 2016  data\n",
-        "train2016 = cudf.read_csv('/content/train_2016_v2.csv',\n",
-        "                          parse_dates=[\"transactiondate\"])\n",
-        "# peek display 2016 train\n",
-        "print(train2016.head())"
-      ],
-      "execution_count": 155,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "   parcelid  logerror transactiondate\n",
-            "0  11016594    0.0276      2016-01-01\n",
-            "1  14366692   -0.1684      2016-01-01\n",
-            "2  12098116   -0.0040      2016-01-01\n",
-            "3  12643413    0.0218      2016-01-02\n",
-            "4  14432541   -0.0050      2016-01-02\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gGiscxESJDrl",
-        "colab_type": "text"
-      },
-      "source": [
-        "## [Zillow Prediction Model](https://github.com/eswar3/Zillow-prediction-models/blob/master/Step%202a-Approach1.ipynb)\n",
-        "\n",
-        "    In this approach the properties data and transaction data are merged together before adressing any missing values\n",
-        "\n",
-        "\n",
-        "#### Merging Data \n",
-        " - we will start by merging the two dataframes\n",
-        "  - then rename the new dataframe's attributes to be meaningful \n",
-        "    - e.g. from `pooltypeid7` to `pool_with_spa_tub_no` and `structuretaxvaluedollarcnt` to `structure_tax`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "o4CvSIcwm4B2",
-        "colab_type": "code",
-        "outputId": "4e59a51a-ebd6-4fe5-b037-3165e57e3b85",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 156
-        }
-      },
-      "source": [
-        "# merge 2016 train and property dataframes by parcel id\n",
-        "train = train2016.merge(prop2016, how='left', on='parcelid')\n",
-        "\n",
-        "# work on a copy\n",
-        "df_train = train.copy()  # [:int(0.5*len(train))]\n",
-        "\n",
-        "# add column inidcaticating month of transaction\n",
-        "df_train['transaction_month'] = df_train['transactiondate'].dt.month\n",
-        "\n",
-        "# set colums to be renamed for general english understandability \n",
-        "rename_these = {\"bathroomcnt\": \"total_bath\",\n",
-        "                \"fullbathcnt\": \"full_bath\",\n",
-        "                \"threequarterbathnbr\": \"half_bath\",\n",
-        "                \"yardbuildingsqft17\": \"patio_sqft\",\n",
-        "                \"yardbuildingsqft26\":\"storage_sqft\",\n",
-        "                \"decktypeid\": \"deck_flag\",\n",
-        "                \"pooltypeid7\": \"pool_with_spa_tub_no\", \n",
-        "                \"pooltypeid2\": \"pool_with_spa_tub_yes\",\n",
-        "                \"hashottuborspa\": \"has_hottub_or_spa\", \n",
-        "                \"pooltypeid10\": \"just_hottub_or_spa\",\n",
-        "                \"calculatedfinishedsquarefeet\":\"total_finished_living_area_sqft\", \n",
-        "                \"finishedsquarefeet12\": \"finished_living_area_sqft\",\n",
-        "                \"lotsizesquarefeet\": \"lot_area_sqft\",\n",
-        "                \"finishedsquarefeet50\":\"finished_living_area_entryfloor_sqft1\",\n",
-        "                \"finishedfloor1squarefeet\":\"finished_living_area_entryfloor_sqft2\",\n",
-        "                \"finishedsquarefeet6\": \"base_unfinished_and_finished_area_sqft\",\n",
-        "                \"finishedsquarefeet15\": \"total_area_sqft\",\n",
-        "                \"finishedsquarefeet13\": \"preimeter_living_area_sqft\",\n",
-        "                \"taxvaluedollarcnt\":\"total_parcel_tax\",\n",
-        "                \"landtaxvaluedollarcnt\":\"land_tax\",\n",
-        "                \"taxamount\":\"total_property_tax_2016\",\n",
-        "                \"structuretaxvaluedollarcnt\":\"structure_tax\",\n",
-        "                \"garagetotalsqft\":\"garage_sqft\",\n",
-        "                \"fireplacecnt\":\"fireplace_count\",\n",
-        "                \"buildingqualitytypeid \":\"building_quality_id\",\n",
-        "                \"heatingorsystemtypeid\":\"heating_system_id\",\n",
-        "                \"airconditioningtypeid\":\"ac_id\",\n",
-        "                \"storytypeid\": \"basement_flag\",\n",
-        "                \"basementsqft\": \"basement_sqft\",\n",
-        "                \"poolsizesum\": \"pool_sqft\",\n",
-        "                \"poolcnt\": \"pool_count\"}\n",
-        "# rename columns \n",
-        "df_train = df_train.rename(columns = rename_these)\n",
-        "\n",
-        "# what's the data frame look like?\n",
-        "print(df_train.head())"
-      ],
-      "execution_count": 156,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "   parcelid  logerror  ... censustractandblock transaction_month\n",
-            "0  11827818    0.0402  ...        6.037532e+13                 3\n",
-            "1  12123024    0.0296  ...        6.037463e+13                 3\n",
-            "2  13867327    0.0344  ...        6.059011e+13                 3\n",
-            "3  12681894    0.0060  ...        6.037651e+13                 3\n",
-            "4  12848541    0.0695  ...        6.037409e+13                 3\n",
-            "\n",
-            "[5 rows x 61 columns]\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YdtyBI2jFnJv",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Conforming Attribute Values\n",
-        "### #0 boolean columns & null = 0s cases \n",
-        "* `pool_count`, `pool_with_spa_tub_no` and `pool_with_spa_tub_yes` are all binary variables, replace all NULL values with zero\n",
-        "*   `basement_flag` has values 7 & `Null` but is supposed to be bool, convert the `7`s to `1`s and the `Null`s to `0`s \n",
-        "* patio and shed variables with null values are assumed to have none\n",
-        "* deck_flag has only 2 values, `66` and `null`\n",
-        "  - convert it into binary flag\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "z3bPdNONHTYI",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# replace missing pool count values so we booling\n",
-        "the_bool_club = ['pool_count','pool_with_spa_tub_no','pool_with_spa_tub_yes',\n",
-        "                 'basement_flag','patio_sqft','storage_sqft', 'deck_flag']\n",
-        "for col in the_bool_club:\n",
-        "  # convert null values to 0\n",
-        "  df_train[col]=df_train[col].fillna(0)\n",
-        "# convert 7s and 66s to 1s\n",
-        "df_train['basement_flag'] = df_train['basement_flag'].replace(7, 1)\n",
-        "df_train['deck_flag'] = df_train['deck_flag'].replace(66, 1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5MbGy6r7JLLD",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #1 The pool\n",
-        "*   When pool is present and if it has tub/spa then `just_hottub_or_spa` = 0"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "B3-1V93smA9A",
-        "colab_type": "code",
-        "outputId": "52e1a5d7-869a-443f-ac2d-40504992dc14",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 156
-        }
-      },
-      "source": [
-        "print(f'before\\n{df_train.just_hottub_or_spa.value_counts()}\\n')\n",
-        "\n",
-        "# if poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null\n",
-        "conditions = ((df_train['pool_count'] == 1) \n",
-        "              & (df_train['has_hottub_or_spa'] == 1) \n",
-        "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
-        "# then just_hottub_or_spa = 0\n",
-        "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
-        "\n",
-        "print(f'after\\n{df_train.just_hottub_or_spa.value_counts()}')\n"
-      ],
-      "execution_count": 158,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "before\n",
-            "1.0    1161\n",
-            "Name: just_hottub_or_spa, dtype: int32\n",
-            "\n",
-            "after\n",
-            "0.0    1204\n",
-            "1.0    1161\n",
-            "Name: just_hottub_or_spa, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "v6E3-_XlSGBs",
-        "colab_type": "text"
-      },
-      "source": [
-        "\n",
-        "- when `has_hottub_or_spa` is null and `just_hottub_or_spa` is null\n",
-        "  - both should be zero\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Xa12WFccSGM6",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# if both has hottub and just hottub are null\n",
-        "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
-        "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
-        "# just hottub or spa = 0 \n",
-        "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
-        "\n",
-        "# now, if has hottub is null and just hottub is 0 \n",
-        "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
-        "              & (df_train['just_hottub_or_spa'] == 0))\n",
-        "# has hottub or spa = 0 \n",
-        "df_train.has_hottub_or_spa.loc[conditions] = 0"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5umCCWN73qxw",
-        "colab_type": "text"
-      },
-      "source": [
-        "- when there is no pool\n",
-        "  - if there is tub/spa \n",
-        "    - then `just_hottub_or_spa`  = 1"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FBgs7zJm3qk-",
-        "colab_type": "code",
-        "outputId": "78c76ac5-2b7f-4f98-9615-8a335bc3214e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 69
-        }
-      },
-      "source": [
-        "# when poolcnt=0, has_hottub_or_spa=1\n",
-        "conditions = ((df_train['pool_count'] == 0) \n",
-        "              & (df_train['has_hottub_or_spa'] == 1))\n",
-        "# just_hottub_or_spa=1\n",
-        "df_train.just_hottub_or_spa.loc[conditions] = 1\n",
-        "\n",
-        "print(df_train.just_hottub_or_spa.value_counts())"
-      ],
-      "execution_count": 160,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0.0    89114\n",
-            "1.0     1161\n",
-            "Name: just_hottub_or_spa, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3LsRr1aoSCVx",
-        "colab_type": "text"
-      },
-      "source": [
-        "*   When there is no pool, set pool size to zero instead of na"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "NtdyXCbx0TKx",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# where there is no pool\n",
-        "conditions = df_train['pool_count']==0\n",
-        "# square footage of non existant pool is 0 \n",
-        "df_train.pool_sqft.loc[conditions] = 0"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3hQFkXmAgQPY",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #2 The basement\n",
-        "*    Where `basement_flag` is zero, `basement_sqft` should also be zero\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "kMuCOqAmLTmY",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# where there is no basement\n",
-        "conditions = df_train['basement_flag'] == 0\n",
-        "# fun fact: we just did this with the pool\n",
-        "df_train.basement_sqft.loc[conditions] = 0"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wU6Uohb-PDYB",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #3 The fireplace\n",
-        "There seems to be inconsistency between the `fireplace_flag` and `fireplace_count`\n",
-        "- 90,053 flag values are null\n",
-        "- 80,688 `fireplace_count` values are null\n",
-        "    * 9,385 (-11.5%) difference, but a boatload either way"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "OZM6lXmmpj5k",
-        "colab_type": "code",
-        "outputId": "ecf62d1d-b036-41ad-8052-a3090ae590ef",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 52
-        }
-      },
-      "source": [
-        "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
-        "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
-      ],
-      "execution_count": 163,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "there are 80668 fireplace_count nulls\n",
-            "there are 90053 fireplaceflag nulls\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "v9ZAzFoIpkSF",
-        "colab_type": "text"
-      },
-      "source": [
-        "* context driven solutions\n",
-        "  * where neither flag nor count exists, `fireplaceflag == False`\n",
-        "  *   when `fireplace_count` is more than zero `fireplaceflag` should be `True`\n",
-        "  * if `fireplaceflag == False`, the `fireplace_count` is logically `0`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "i3YRZgU_qZhA",
-        "colab_type": "code",
-        "outputId": "e45a7a96-2e1d-47d2-a0bd-48ece42cbb6e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 52
-        }
-      },
-      "source": [
-        "# null flags with null counts are zero\n",
-        "conditions = ((df_train['fireplace_count'].isna()==True) \n",
-        "              & (df_train['fireplaceflag'].isna()==True))\n",
-        "df_train.fireplaceflag.loc[conditions] = False\n",
-        "\n",
-        "# true flags for positive fireplace counts\n",
-        "conditions = df_train['fireplace_count'] > 0\n",
-        "df_train.fireplaceflag.loc[conditions] = True\n",
-        "\n",
-        "# set fireplace count nulls to 0 where false flags are\n",
-        "conditions = ((df_train['fireplace_count'].isna()==True) \n",
-        "              & (df_train['fireplaceflag']==False))\n",
-        "df_train.fireplace_count.loc[conditions] = 0\n",
-        "\n",
-        "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
-        "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
-      ],
-      "execution_count": 164,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "there are 222 fireplace_count nulls\n",
-            "there are 0 fireplaceflag nulls\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "colab_type": "text",
-        "id": "pYntUejosOn3"
-      },
-      "source": [
-        "### #4 The garage\n",
-        "*   Properties with no garages would have NA values for both "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "L9mGs-mK9E0Q",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "garage = ['garagecarcnt', 'garage_sqft']\n",
-        "# where garage car count and garage square feet are null\n",
-        "conditions = ((df_train['garagecarcnt'].isna()==True) \n",
-        "              & (df_train['garage_sqft'].isna()==True))\n",
-        "# set both to 0\n",
-        "df_train[garage].loc[conditions] = 0"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0uV115W6-ohW",
-        "colab_type": "text"
-      },
-      "source": [
-        "Exploring the data farther, we see\n",
-        "- `garage_sqft` holds over 8,900 measurements of 0 despite the garage's car count being 1 or more  \n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "gbbUIbwJ-ouS",
-        "colab_type": "code",
-        "outputId": "310a4cdf-01a0-4fc3-ed1b-0e2f5e668518",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 121
-        }
-      },
-      "source": [
-        "# show rows where garage count and square feet don't add up\n",
-        "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
-        "print(df_train.loc[conditions][garage].head())"
-      ],
-      "execution_count": 166,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "    garagecarcnt  garage_sqft\n",
-            "16           2.0          0.0\n",
-            "29           1.0          0.0\n",
-            "32           2.0          0.0\n",
-            "49           1.0          0.0\n",
-            "52           2.0          0.0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5I1O76QKA8Cb",
-        "colab_type": "text"
-      },
-      "source": [
-        "- these 0 values need to be null\n",
-        " - because no garage holding 1 or more cars in 2016 measured 0sqft"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "eWVtoty0A9Jt",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# where garage count and square feet don't add up\n",
-        "conditions = (df_train.garagecarcnt>0) & (df_train.garage_sqft==0)\n",
-        "# insert a NaN value\n",
-        "df_train.garage_sqft.loc[conditions] = np.nan"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "colab_type": "text",
-        "id": "seb6r5wx5Bbz"
-      },
-      "source": [
-        "### #5 The bath\n",
-        "*   `total_bath` & `calculatedbathnbr` are near-duplicates w/ `calculated` having more nulls\n",
-        "  - let's drop it\n",
-        "*   if `full_bath` is null and `half_bath` is also null\n",
-        "  - let's make `total_bath` = 0 \n",
-        "      - because we can't truthfully assume it's any more "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "colab_type": "code",
-        "id": "EgMNToed5BMu",
-        "colab": {}
-      },
-      "source": [
-        "# drop calculated bath column\n",
-        "df_train = df_train.drop('calculatedbathnbr', axis=1)\n",
-        "\n",
-        "# if full_bath is null & half_bath is null\n",
-        "conditions = ((df_train['full_bath'].isnull()==True) \n",
-        "              & (df_train['half_bath'].isnull()==True) \n",
-        "              & (df_train['total_bath']==0))\n",
-        "# total_bath=0\n",
-        "df_train.total_bath.loc[conditions] = np.nan\n",
-        "\n",
-        "# when full_bath==total_bath, half_bath=0 \n",
-        "df_train.half_bath.loc[df_train.full_bath == df_train.total_bath] = 0"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "colab_type": "text",
-        "id": "Sh8cG0pr4_hl"
-      },
-      "source": [
-        "### #6 Mode Imputation \n",
-        "* scaling down the latitude and longitide\n",
-        "  - knn imput takes more time due to the larger numbers\n",
-        "  - standardizing gives better results on most algorithms\n",
-        "    - this is a competition, we came to win"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "kitrNxKgLWUd",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "df_train['latitude'] = df_train.latitude / 100000\n",
-        "df_train['longitude'] = df_train.longitude / 100000"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "y6bhRhu5YZ1d",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #7 numberofstories & unitcnt & roomcnt\n",
-        "* we can devise unit count based on property land type\n",
-        "  - so we can now go ahead and correct the unit counts for each given property"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yHZH4rMNLfBA",
-        "colab_type": "code",
-        "outputId": "97106bb4-10f2-49a9-f821-03a3972db136",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 208
-        }
-      },
-      "source": [
-        "# where room count is 0, go ahead and NaN it\n",
-        "df_train.roomcnt.loc[df_train['roomcnt'] == 0] = np.nan\n",
-        "\n",
-        "\"\"\"\n",
-        "propertylandusetypeid & unitcnt are related \n",
-        "  these are the propertylandusetypeid codes & their definitions\n",
-        "  \n",
-        "#246 -Duplex (2 Units, Any Combination)\n",
-        "#247 -Triplex (3 Units, Any Combination)\n",
-        "#248 -Quadruplex (4 Units, Any Combination)\n",
-        "#260 -Residential General\n",
-        "#261 -Single Family Residential\n",
-        "#263 -Mobile Home\n",
-        "#264 -Townhouse\n",
-        "#266 -Condominium\n",
-        "#267 -Cooperative\n",
-        "#269 -Planned Unit Development\n",
-        "#275 -Residential Common Area \n",
-        "#31 - Commercial/Office/Residential Mixed Used\n",
-        "#47 -Store/Office (Mixed Use)\n",
-        "#265 -Cluster Home\n",
-        "\"\"\"\n",
-        "\n",
-        "# one unit \n",
-        "ones = [260,261,263,264,266,267,269,275]\n",
-        "for one in ones:\n",
-        "  # adjust conditions to one unit indicator\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == one) \n",
-        "                & (df_train['unitcnt'].isna()))\n",
-        "  df_train.unitcnt.loc[conditions] = 1\n",
-        "\n",
-        "# two units \n",
-        "twos = [31,47,246]\n",
-        "for two in twos:\n",
-        "  # adjust conditions to two unit indicator\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == two) \n",
-        "                & (df_train['unitcnt'].isna()))\n",
-        "  df_train.unitcnt.loc[conditions] = 2\n",
-        "\n",
-        "# three units\n",
-        "conditions = ((df_train['propertylandusetypeid'] == 247) \n",
-        "              & (df_train['unitcnt'].isna()))\n",
-        "df_train.unitcnt.loc[conditions] = 3\n",
-        "\n",
-        "# four units\n",
-        "conditions = ((df_train['propertylandusetypeid'] == 248) \n",
-        "              & (df_train['unitcnt'].isna()))\n",
-        "df_train.unitcnt.loc[conditions] = 4\n",
-        "\n",
-        "# let's see how out unit counts look\n",
-        "print(df_train.unitcnt.value_counts())"
-      ],
-      "execution_count": 170,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "1.0      86035\n",
-            "2.0       2372\n",
-            "4.0        884\n",
-            "3.0        622\n",
-            "5.0          1\n",
-            "6.0          1\n",
-            "9.0          1\n",
-            "11.0         1\n",
-            "70.0         1\n",
-            "143.0        1\n",
-            "Name: unitcnt, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "02yLicmxLs3C",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #8 Time to Cut\n",
-        "**Because of the adjustments made so far a number of columns are no longer needed**\n",
-        "*  transaction date column is no longer of use\n",
-        "  - and can be dropped \n",
-        "* `preimeter_living_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
-        "  - except that `preimeter_living_area_sqft` has more duplicates\n",
-        "* `total_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
-        "  - except that \"total_area_sqft\" has more duplicates\n",
-        "* `total_finished_living_area_sqft` and `finished_living_area_sqft` have the same values \n",
-        "  - except that `finished_living_area_sqft` has more duplicates\n",
-        "* `base_unfinished_and_finished_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
-        "  - except that `base_unfinished_and_finished_area_sqft` has more duplicates\n",
-        "* different counties follow different land use code\n",
-        "  - to compare different counties, zillow has created it's own `propertylandusetypeid`\n",
-        "    - hence we can drop `propertycountylandusecode`\n",
-        "    - the same applies to `propertyzoningdesc`\n",
-        "* Most zip id's either invalid or out of city\n",
-        "  - since enough information about location is given in latitude and longitude \n",
-        "    - let's drop other location related fields\n",
-        "      - `regionidcity`\n",
-        "      - `regionidzip`\n",
-        "      - `regionidneighborhood`\n",
-        "* `assessmentyear` has a constant value for all rows\n",
-        "  - let's drop it"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "OtOgzOqHLyid",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# collect columns to drop\n",
-        "cut = ['propertyzoningdesc','propertycountylandusecode',\n",
-        "       'base_unfinished_and_finished_area_sqft','finished_living_area_sqft',\n",
-        "       'total_area_sqft','preimeter_living_area_sqft','regionidzip',\n",
-        "       'regionidcity','regionidneighborhood','assessmentyear','transactiondate',\n",
-        "       'censustractandblock']\n",
-        "# cut columns form dataframe\n",
-        "df_train = df_train.drop(cut, axis=1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "icDvpvSD6BSb",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #9 Tax, Year, & Census\n",
-        "-  if tax deliquency flag is null, assume there is no unpaid tax on the property\n",
-        "  - an issue arrises here because `taxdelinquencyflag` is a `StringColumn`\n",
-        "    - i.e. null values indicate no tax delinquency, all other values are `Y` for yes\n",
-        "    - because of this, the normal method of.."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8lYcO_T5XKNN",
-        "colab_type": "code",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 311
-        },
-        "outputId": "596cfad3-890d-4241-b8b8-347673082a7f"
-      },
-      "source": [
-        "# how we'd normally take care of this\n",
-        "df_train['taxdelinquencyflag'].fillna(0)"
-      ],
-      "execution_count": 172,
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "TypeError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-172-f9b8b7d87fff>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1165\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1166\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1167\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1169\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/column/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    720\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         ):\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tA6xG6h59rLi",
-        "colab_type": "text"
-      },
-      "source": [
-        "- ...comes with error. \n",
-        "  - Why?\n",
-        "    - the series we are trying to fill the null values of is a string series\n",
-        "      - because of this `.fillna()` requires a sting value (e.g. '0') instead of an int value (e.g. 0)\n",
-        "  - So, what now?\n",
-        "    - there is an easy and straightforward solution with masked assigning!! \n",
-        "      - First\n",
-        "        - switch 1 (current True, actual False) to -1\n",
-        "      - Then\n",
-        "        - switch 0 (current False, actual True) to 1 to reflect True status\n",
-        "      - Finally\n",
-        "        - switch -1 (old True, actual False) to 0 to reflect False status"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Svp6J0cJ5dL0",
-        "colab_type": "code",
-        "outputId": "03862711-e104-4954-bf9c-61bd51b3a9e3",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 69
-        }
-      },
-      "source": [
-        "# if bool 'Y'/None is already set, change string to int bool column via .isna()\n",
-        "df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].isna()\n",
-        "\n",
-        "# next we must correct the values, with 1 (True) for 'Y' and 0 for no\n",
-        "switcharoo = [(1,-1),(0,1),(-1,0)]\n",
-        "# switch values in order\n",
-        "for pair in switcharoo:\n",
-        "  # tag old value and new value it will be replaced with\n",
-        "  old, new = pair\n",
-        "  # replace old value with new value\n",
-        "  df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].replace(old, \n",
-        "                                                                          new)\n",
-        "# display values in tax delinquency flag column\n",
-        "print(df_train['taxdelinquencyflag'].value_counts())"
-      ],
-      "execution_count": 173,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0    88492\n",
-            "1     1783\n",
-            "Name: taxdelinquencyflag, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "w5EAdWXaCTRU",
-        "colab_type": "text"
-      },
-      "source": [
-        "- Convert years\n",
-        "  - from yy\n",
-        "    - to 2016 - yyyy \n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "lHh95mAIMrMy",
-        "colab_type": "code",
-        "outputId": "832c405d-d89f-4b85-d77d-7a6726a61907",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 69
-        }
-      },
-      "source": [
-        "print(df_train.taxdelinquencyflag.value_counts())"
-      ],
-      "execution_count": 174,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0    88492\n",
-            "1     1783\n",
-            "Name: taxdelinquencyflag, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "6Bic66I9LfGC",
-        "colab_type": "code",
-        "outputId": "baaa5387-bbd7-4242-a336-0b6b90606935",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 243
-        }
-      },
-      "source": [
-        "# no delinquency? set year to 0\n",
-        "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyflag == 0] = 0\n",
-        "# collect x and xx formatted delinquency years w/ matching xxxx year format pair\n",
-        "year_pairs = [(99,1999), (6,2006), (7,2007), (8,2008), (9,2009), (10,2010),\n",
-        "             (11,2011), (12,2012), (13,2013), (14,2014), (15,2015)]\n",
-        "# go through the pairs individually \n",
-        "for year in year_pairs:\n",
-        "  # split the pair in question \n",
-        "  old, new = year\n",
-        "  # replace old year (e.g. 99) with new year (e.g. 1999)\n",
-        "  df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear == old] = new\n",
-        "\n",
-        "# adjust delinquency year relative to training year (2016) \n",
-        "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0] = 2016 - df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0]\n",
-        "# what've we got? \n",
-        "print(df_train.taxdelinquencyyear.value_counts())"
-      ],
-      "execution_count": 175,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0.0     88492\n",
-            "2.0       628\n",
-            "1.0       518\n",
-            "3.0       210\n",
-            "4.0       154\n",
-            "6.0        89\n",
-            "5.0        85\n",
-            "7.0        63\n",
-            "8.0        24\n",
-            "9.0         8\n",
-            "10.0        3\n",
-            "17.0        1\n",
-            "Name: taxdelinquencyyear, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ya7xLHzdGVcs",
-        "colab_type": "text"
-      },
-      "source": [
-        "- values in `rawcensustractandblock` represent multiple fields concatened together as float values\n",
-        "  - by converting those values to string we can split each and build new columns:\n",
-        "    - `census_tractnumber`\n",
-        "    - `block_number`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "AWx7lq0xkDV2",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# make a copy of dataframe at this point\n",
-        "# pre_string = df_train.copy()\n",
-        "df_train = pre_string.copy()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Sg0eN-K1QdZy",
-        "colab_type": "code",
-        "outputId": "a90de47f-5c88-4834-df44-75a9dedcd07c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 489
-        }
-      },
-      "source": [
-        "# copy rawcensustractandblock with values as string instead of float\n",
-        "string_data = cudf.Series(df_train['rawcensustractandblock'].values_to_string())\n",
-        "\n",
-        "# print(type(string_data))\n",
-        "# print(len(string_data))\n",
-        "# print(string_data)\n",
-        "\n",
-        "\"\"\"\n",
-        "CURRENT ERROR IN CONVERSION OF VALUES\n",
-        "\"\"\"\n",
-        "print(f\"\\nNOTE: THERE APPEARS TO BE AN ERROR WHEN CONVERTING TO STRING\\n\"\n",
-        "      f\"  > somewhat random numbers added to end of some values\\n    >> e.g. 004, 006\"\n",
-        "      f\"\\n\\n\\ndf_train['rawcensustractandblock'].head(10).values\\n\"\n",
-        "      f\"{df_train['rawcensustractandblock'].head(10).values}\\n\\n\"\n",
-        "      f\"data.head(10).values\\n{string_data.head(10).values}\\n\\n\\n\"\n",
-        "      f\"THE SAME NUMBERS OCCOUR IN THE FIRST WHEN PUT INTO A LIST\\n\"\n",
-        "      f\"  > not sure how to deal with this now\\n\"\n",
-        "      f\"    >> difficult to reproduce without data\\n\\n\")\n",
-        "\"\"\"\n",
-        "CURRENT ERROR IN CONVERSION OF VALUES\n",
-        "\"\"\"\n",
-        "\n",
-        "# set new tract number \n",
-        "df_train['census_tractnumber'] = string_data.str.slice(4, 11)\n",
-        "\n",
-        "# set/adjust block number\n",
-        "df_train['block_number'] = string_data.str.slice(11)\n",
-        "df_train['block_number'] = df_train.block_number.str.slice(0,4).str.cat(df_train.block_number.str.slice(4), '.')\n",
-        "df_train['block_number'] = df_train.block_number.astype('float').round(0).astype('int')\n",
-        "df_train['block_number'] = df_train.block_number.astype('str').str.ljust(4, '0')\n",
-        "\n",
-        "# drop raw census tract and block column, no longer needed\n",
-        "df_train=df_train.drop('rawcensustractandblock', axis=1)\n",
-        "\n",
-        "\"\"\"\n",
-        "CORRECT NUMBERS THAT SHOULD BE DISPLAYED BY BELOW PRINT STATEMENT\n",
-        "  > currently not being seen due to prior mentioned error\n",
-        "\n",
-        "tractnumber\n",
-        "0    1066.46\n",
-        "1    0524.22\n",
-        "2    4638.00\n",
-        "3    2963.00\n",
-        "4    0423.38\n",
-        "dtype: object\n",
-        "\n",
-        "blocknumber\n",
-        "0    1001\n",
-        "1    2024\n",
-        "2    3004\n",
-        "3    2002\n",
-        "4    1006\n",
-        "dtype: object\n",
-        "\"\"\"\n",
-        "print(df_train[['census_tractnumber', 'block_number']].head())"
-      ],
-      "execution_count": 177,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "NOTE: THERE APPEARS TO BE AN ERROR WHEN CONVERTING TO STRING\n",
-            "  > somewhat random numbers added to end of some values\n",
-            "    >> e.g. 004, 006\n",
-            "\n",
-            "\n",
-            "df_train['rawcensustractandblock'].head(10).values\n",
-            "[60375315.031013   60374625.001017   60590114.012017   60376513.02100401\n",
-            " 60374087.031018   60375759.011001   60590630.044      60374061.011006\n",
-            " 60378001.022007   60590524.19100901]\n",
-            "\n",
-            "data.head(10).values\n",
-            "['60375315.031013004', '60374625.001017004', '60590114.012017', '60376513.021004006', '60374087.031018004', '60375759.011001', '60590630.044', '60374061.011006', '60378001.022007', '60590524.19100901']\n",
-            "\n",
-            "\n",
-            "THE SAME NUMBERS OCCOUR IN THE FIRST WHEN PUT INTO A LIST\n",
-            "  > not sure how to deal with this now\n",
-            "    >> difficult to reproduce without data\n",
-            "\n",
-            "\n",
-            "  census_tractnumber block_number\n",
-            "0            5315.03         1013\n",
-            "1            4625.00         1017\n",
-            "2            0114.01         2017\n",
-            "3            6513.02         1004\n",
-            "4            4087.03         1018\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "T71orw51lpTN",
-        "colab_type": "text"
-      },
-      "source": [
-        "## Dealing with Missing Values\n",
-        "### #1 Setting standards\n",
-        "- Despite corecting and adjusting the data to this point, there are still some columns holding a large majority of null values\n",
-        "- For some columns, this majority represents over 95% of values\n",
-        "  - Let's identify those columns\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "xhCosNpXvTVU",
-        "colab_type": "code",
-        "outputId": "2d969756-decb-4912-94f6-19836eb0323a",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 86
-        }
-      },
-      "source": [
-        "# calculate null value % for each column & frame it\n",
-        "missingvalues_prop = (df_train.isnull().sum()/len(df_train)).reset_index()\n",
-        "missingvalues_prop.columns = ['field','percentage']\n",
-        "\n",
-        "# sort by null values percentage, from highest % to lowest\n",
-        "missingvalues_prop = missingvalues_prop.sort_values(by='percentage', \n",
-        "                                                    ascending=False)\n",
-        "# identify columns with > 95% of values null\n",
-        "missingvaluescols = missingvalues_prop.loc[missingvalues_prop['percentage'] > 0.95]\n",
-        "\n",
-        "# display columns with highest % null values\n",
-        "print(missingvaluescols)\n",
-        "\n",
-        "# drop columns with more than 95% null values\n",
-        "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
-      ],
-      "execution_count": 178,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "                       field  percentage\n",
-            "7        buildingclasstypeid    0.999823\n",
-            "3   architecturalstyletypeid    0.997109\n",
-            "33    typeconstructiontypeid    0.996688\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8eBIDWEUBHwz",
-        "colab_type": "text"
-      },
-      "source": [
-        "- and drop columns with more than 95% null values"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "az6t2ntBCMRe",
-        "colab_type": "text"
-      },
-      "source": [
-        "### #2 Working with Remaining Values\n",
-        "- the majority of values still missing in unitcnt are rows were `propertylandusetypeid` = 265, \n",
-        "  - which is Cluster Home (i.e. group of houses with shared walls)\n",
-        "    - each cluster is anywhere between 5 to 25 units\n",
-        "      - here we will asssume 10 units as reassonable count"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yB2lzAyopS_S",
-        "colab_type": "code",
-        "outputId": "db6c7add-5452-4535-8948-a426654851b7",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 225
-        }
-      },
-      "source": [
-        "# highly related propertylandusetypeid\n",
-        "df_train['unitcnt'].loc[df_train['propertylandusetypeid'] == 265] = 10\n",
-        "# let's see what we've got\n",
-        "print(df_train['unitcnt'].value_counts())"
-      ],
-      "execution_count": 179,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "1.0      86035\n",
-            "2.0       2372\n",
-            "4.0        884\n",
-            "3.0        622\n",
-            "10.0       356\n",
-            "5.0          1\n",
-            "6.0          1\n",
-            "9.0          1\n",
-            "11.0         1\n",
-            "70.0         1\n",
-            "143.0        1\n",
-            "Name: unitcnt, dtype: int32\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iR1rBlz-dOdH",
-        "colab_type": "text"
-      },
-      "source": [
-        "- a number of pool sizes are null despite there being a pool\n",
-        "  - let's calculate the average pool size\n",
-        "    - and assume those null values are pools of average size"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "-icFDeLSoJwl",
-        "colab_type": "code",
-        "outputId": "b1ed39c3-3a14-4dc1-eb48-b3429da5cffe",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "# calculate the average pool square footage for properties with a pool(s)\n",
-        "poolsizesum_mean = df_train.pool_sqft.loc[df_train['pool_count'] > 0].mean()\n",
-        "\n",
-        "# where the property has a pool(s) but pool square feet is 0\n",
-        "conditions = ((df_train['pool_count'] > 0) \n",
-        "              & (df_train['pool_sqft'].isna()==True))\n",
-        "\n",
-        "# set pool square feet to the average pool square footage of pool properties\n",
-        "df_train['pool_sqft'].loc[conditions] = poolsizesum_mean\n",
-        "\n",
-        "print(df_train.pool_sqft.isna().sum())"
-      ],
-      "execution_count": 180,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AyGeXJfEmJBU",
-        "colab_type": "text"
-      },
-      "source": [
-        "- total parcel tax\n",
-        "- structure tax\n",
-        "- land tax"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "3pVABkZTYK9F",
-        "colab_type": "code",
-        "outputId": "b5cb7ced-7458-4971-936c-b6e5d33bc126",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 173
-        }
-      },
-      "source": [
-        "#total_parcel_tax\n",
-        "#structure_tax\n",
-        "#land_tax\n",
-        "#total_property_tax_2016\n",
-        "#2)recalculate total_parcel_tax = structure_tax + land_tax\n",
-        "print(df_train.total_property_tax_2016.isnull().sum())\n",
-        "print(df_train.structure_tax.isnull().sum())\n",
-        "print(df_train.total_parcel_tax.isnull().sum())\n",
-        "print(df_train.land_tax.isnull().sum())\n",
-        "print()\n",
-        "\n",
-        "# total_parcel_tax =structure_tax + land_tax\n",
-        "#->structure_tax=total_parcel_tax -land_tax\n",
-        "\n",
-        "# where parcel and land taxes are greater than 0\n",
-        "parcel_taxes = df_train.total_parcel_tax.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
-        "land_taxes = df_train.land_tax.loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)]\n",
-        "# set structure tax to be their difference\n",
-        "df_train['structure_tax'].loc[(df_train.total_parcel_tax>0) & (df_train.land_tax>0)] = parcel_taxes - land_taxes\n",
-        "\n",
-        "# where structure tax is still 0, there isn't structure tax\n",
-        "df_train.structure_tax.loc[df_train.structure_tax==0] = np.nan\n",
-        "\n",
-        "print(df_train.total_property_tax_2016.isnull().sum())\n",
-        "print(df_train.structure_tax.isnull().sum())\n",
-        "print(df_train.total_parcel_tax.isnull().sum())\n",
-        "print(df_train.land_tax.isnull().sum())"
-      ],
-      "execution_count": 181,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "6\n",
-            "380\n",
-            "1\n",
-            "1\n",
-            "\n",
-            "6\n",
-            "380\n",
-            "1\n",
-            "1\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8SID48LOpYvu",
-        "colab_type": "code",
-        "outputId": "6d20a3ba-4360-4554-908d-f6d673aece12",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "# regionidcounty is exact copy of fips code, dropping the dulicate column\n",
-        "df_train = df_train.drop(['regionidcounty'], axis=1)\n",
-        "df_train.shape"
-      ],
-      "execution_count": 182,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "(90275, 45)"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          },
-          "execution_count": 182
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "tWmM2J8_pkg1",
-        "colab_type": "code",
-        "outputId": "6362e07f-e363-4884-b0c5-9380b5fee956",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 34
-        }
-      },
-      "source": [
-        "#*******************************\n",
-        "#bedroomcnt #1421 zero bed room houses ??, observed it's missing all other room count also missing\n",
-        "# where there is no bedroom, null is a better representation \n",
-        "df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0] = np.nan\n",
-        "print(df_train.bedroomcnt.isnull().sum())"
-      ],
-      "execution_count": 183,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "1421\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "3qnP2L9LpmeJ",
-        "colab_type": "code",
-        "outputId": "c0eabce4-3232-4435-8733-779526f18c57",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 208
-        }
-      },
-      "source": [
-        "# propertylandusetypeid & total living area\n",
-        "#                              total_bath           1165\n",
-        "#                              full_bath           1182\n",
-        "#                              half_bath           1182\n",
-        "#                                bedroomcnt      1421\n",
-        "#                              roomcnt           1416\n",
-        "\n",
-        "print(df_train.total_bath.isna().sum())\n",
-        "print(df_train.full_bath.isnull().sum())\n",
-        "print(df_train.half_bath.isnull().sum())\n",
-        "print(df_train.bedroomcnt.isnull().sum())\n",
-        "print(df_train.roomcnt.isnull().sum())\n",
-        "print()\n",
-        "\n",
-        "# roomcnt = (full_bath + half_bath) + bedroomcnt\n",
-        "# total_bath = fullbath+ 0.5(half_bath)\n",
-        "\n",
-        "#caluculate full bath and half bath again from total bath as, it has few extra columns, (fixes 500 missing values in roomcnt )\n",
-        "\n",
-        "# where full & half bath and bedroom count are not null, but room count is null\n",
-        "conditions = ((df_train['full_bath'].isna() == False) \n",
-        "              & (df_train['half_bath'].isna() == False) \n",
-        "              & (df_train['bedroomcnt'].isna() == False) \n",
-        "              & (df_train['roomcnt'].isna() == True))\n",
-        "# calculate room count including all full & half baths along with bedroom count\n",
-        "new_values = df_train.full_bath.loc[conditions] + df_train.half_bath.loc[conditions] + df_train.bedroomcnt.loc[conditions]\n",
-        "# df_train['roomcnt'] = df_train['roomcnt'].masked_assign(new_values, conditions)\n",
-        "df_train.roomcnt.loc[conditions] = new_values\n",
-        "\n",
-        "\n",
-        "# most bedroom count and roomcount null are in same place\n",
-        "# all column null count 1133 all columns are null\n",
-        "\n",
-        "print(df_train.total_bath.isna().sum())\n",
-        "print(df_train.full_bath.isnull().sum())\n",
-        "print(df_train.half_bath.isnull().sum())\n",
-        "print(df_train.bedroomcnt.isnull().sum())\n",
-        "print(df_train.roomcnt.isnull().sum())"
-      ],
-      "execution_count": 184,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "1165\n",
-            "1182\n",
-            "1182\n",
-            "1421\n",
-            "69700\n",
-            "\n",
-            "1165\n",
-            "1182\n",
-            "1182\n",
-            "1421\n",
-            "1416\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Mvy51Ckev9CX",
-        "colab_type": "text"
-      },
-      "source": [
-        "- correct number of stories by Zillow's `propertylandusetypeid` indicator\n",
-        "  - where null values are not\n",
-        "    - number of stories can be set to mode\n",
-        "  - where there are null values\n",
-        "    - number of stories can be set to the generally accepted number of stories"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "IW4CG2InpolD",
-        "colab_type": "code",
-        "outputId": "02375307-54e2-432b-8b87-1397c73d56b2",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 260
-        }
-      },
-      "source": [
-        "# before (what's it look like?)\n",
-        "print(f'BEFORE\\n{df_train.numberofstories.value_counts()}\\n'\n",
-        "      f'{df_train.numberofstories.isnull().sum()} remaining null values\\n')\n",
-        "\n",
-        "#numberofstories\t69705\n",
-        "\n",
-        "# store ids and general number of stories \n",
-        "zillow_type_ids = [(31,2), (246,2), (247,2), (248,2), (260,2), (261,1), \n",
-        "                   (263,1), (266,1), (267,1), (269, 2), (275,1)]\n",
-        "\n",
-        "# go through each id pair \n",
-        "for type_id in zillow_type_ids:\n",
-        "  # split the pair into type id and number of stories\n",
-        "  t_id, n_stories = type_id\n",
-        "\n",
-        "  # when type id matches and story count is not null\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
-        "                & (df_train['numberofstories'].isna() == False))\n",
-        "  \n",
-        "  # calculate the mode story count for matching id properties\n",
-        "  mode_stories = df_train.numberofstories.loc[conditions].value_counts()\n",
-        "  # when there is at least one value in the value_counts of this property type\n",
-        "  if len(mode_stories) > 0:\n",
-        "    # set mode stories to the most popular value\n",
-        "    mode_stories = mode_stories[0]\n",
-        "  # otherwise\n",
-        "  else:\n",
-        "    # set mode stories to the general average for this property type\n",
-        "    mode_stories = n_stories\n",
-        "  \n",
-        "  # and set those non null values to the most common value seen\n",
-        "  df_train['numberofstories'].loc[conditions] = mode_stories\n",
-        "  \n",
-        "  # when type id matches and story count is null\n",
-        "  conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
-        "                & (df_train['numberofstories'].isna() == False))\n",
-        "  # set null values to the common number of stories seen in that type id\n",
-        "  df_train['numberofstories'].loc[conditions] = n_stories\n",
-        "\n",
-        "# edge cases\n",
-        "conditions = ((df_train.propertylandusetypeid==264) \n",
-        "              & (df_train.numberofstories.isnull()))\n",
-        "df_train.numberofstories.loc[conditions] = 2\n",
-        "\n",
-        "# what's it looking like? \n",
-        "print(f'AFTER\\n{df_train.numberofstories.value_counts()}\\n'\n",
-        "      f'{df_train.numberofstories.isnull().sum()} remaining null values')"
-      ],
-      "execution_count": 185,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "BEFORE\n",
-            "1.0    12016\n",
-            "2.0     8044\n",
-            "3.0      508\n",
-            "4.0        2\n",
-            "Name: numberofstories, dtype: int32\n",
-            "69705 remaining null values\n",
-            "\n",
-            "AFTER\n",
-            "1.0    20154\n",
-            "2.0      423\n",
-            "3.0        4\n",
-            "Name: numberofstories, dtype: int32\n",
-            "69694 remaining null values\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "AHcMsDCxprd4",
-        "colab_type": "code",
-        "outputId": "30481b2c-e035-4478-d62f-63e10a09c17e",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 295
-        }
-      },
-      "source": [
-        "# before (what's it looking like?) \n",
-        "print(f'BEFORE\\n{df_train.fireplace_count.value_counts()}\\n'\n",
-        "      f'{df_train.fireplace_count.isnull().sum()} remaining null values\\n')\n",
-        "\n",
-        "# where there is a fire place, and count is not null\n",
-        "conditions = ((df_train.fireplaceflag==1) \n",
-        "              & (df_train.fireplace_count.isna() == False))\n",
-        "# calculate the mode fireplace count \n",
-        "mode_fire_count = df_train.loc[conditions, 'fireplace_count'].value_counts()[0]\n",
-        "# and set those non null values to the most common fireplace count\n",
-        "df_train['fireplace_count'].loc[conditions] = mode_fire_count\n",
-        "\n",
-        "# where there is a fire place, and count is null\n",
-        "conditions = ((df_train.fireplaceflag==1) \n",
-        "              & (df_train.fireplace_count.isna() == True))\n",
-        "# set null values to the most common fireplace count\n",
-        "df_train.fireplace_count.loc[conditions] = 1\n",
-        "\n",
-        "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.notnull()),'fireplace_count'].mode()\n",
-        "# df_train.loc[(df_train.fireplaceflag==1) & (df_train.fireplace_count.isnull()),'fireplace_count']=1\n",
-        "\n",
-        "# after\n",
-        "print(f'AFTER\\n{df_train.fireplace_count.value_counts()}\\n'\n",
-        "      f'{df_train.fireplace_count.isnull().sum()} remaining null values')"
-      ],
-      "execution_count": 186,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "BEFORE\n",
-            "0.0    80446\n",
-            "1.0     8165\n",
-            "2.0     1106\n",
-            "3.0      312\n",
-            "4.0       21\n",
-            "5.0        3\n",
-            "Name: fireplace_count, dtype: int32\n",
-            "222 remaining null values\n",
-            "\n",
-            "AFTER\n",
-            "0.0       80446\n",
-            "8165.0     9607\n",
-            "1.0         222\n",
-            "Name: fireplace_count, dtype: int32\n",
-            "0 remaining null values\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FIuSWoJspt3H",
-        "colab_type": "code",
-        "outputId": "cb11c3a1-1658-4bce-cbde-a1a47ccdc0a8",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 317
-        }
-      },
-      "source": [
-        "# set basic sns \n",
-        "color = sns.color_palette()\n",
-        "sns.set(style=\"darkgrid\")\n",
-        "# convert dataframe to pandas for ease of use with sns\n",
-        "pd_train = df_train.to_pandas()\n",
-        "# set ax plot\n",
-        "ax = sns.countplot(x=\"buildingqualitytypeid\", data=pd_train)\n",
-        "# adjust fringe aesthetics\n",
-        "plt.xticks(rotation='vertical')\n",
-        "plt.title(\"Frequency of Bathroom count\", fontsize=15)\n",
-        "# display the graph\n",
-        "plt.show()"
-      ],
-      "execution_count": 187,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAEsCAYAAACFRGf6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XlYVGX/P/D3DAiCghMIBFQuGEii\nYhBgYiaKIKGoaeKammtuZe4S9OAWiGEuqaVfzUxNS0UWBZdv+eRjmpUVov6U3NmUAXEDYeb8/uDL\neRwBHVC4B3y/rovrYs59Zs7n3LO859znzDkKSZIkEBER1TKl6AKIiOjZxAAiIiIhGEBERCQEA4iI\niIRgABERkRAMICIiEoIB9AxasWIFXFxcyv2NGDFCdGn1WkpKCgIDA+Hm5gZ/f/8K57l06ZLOc+Lq\n6oo333wT4eHhyMvLq/IyExMTsXv37nLTBw0ahA8//LDKj0dVc/LkSaxcuVJ0GQbLWHQBJIaFhQXW\nrVtXbhrVjJKSEsyaNQt+fn5YsGABGjVq9Mj558yZA3d3d2g0GqSnpyM2NhaZmZn46quvqrTcxMRE\n3L17F3369HmS8qmaTp48iS+++AKTJk0SXYpBYgA9o4yMjODu7q73/IWFhWjYsGENVlS/ZWVl4e7d\nu+jduzc8PT0fO3/Lli3l58fDwwOFhYVYvHix8OdB9PKpfuEQHJVTUlICFxcXfP3111iwYAF8fHx0\nvkGnpKSgX79+aNu2LXx9fRETE4OSkhKdx0hKSkKPHj3Qrl07DBs2DH/99RdcXFwQFxens4ytW7fq\n3C82NhadOnXSmXbt2jV88MEHeO2119C+fXuMHj0aFy9elNvLhq2Sk5MRFhYGDw8PvPHGG1i5ciUe\nPtHHmTNnMHbsWHh4eKBDhw545513cPToURQXF+P111/HF198Ua4/Bg0ahClTpjyyzxITExEcHAw3\nNze8+eab+Pzzz6HRaAAAO3bsQLdu3QAAY8eOhYuLS4XLeZRGjRpBq9VCq9XK03744QeEhobCy8sL\nXl5eePfdd3Hq1Cm5ffr06Th48CCOHj0qD+k9vNzdu3eje/fuePXVVzFmzBhkZ2fLbWX9mpiYiOnT\np8PT0xMTJ04EAGg0GixbtgxdunSBm5sbgoODkZiYWKV+KesbFxcXnD59GkOGDEH79u3Rt29fnD59\nGnfu3MGsWbPw6quvonv37khKSnpsP2k0GqxevRo9evSAm5sb3njjDcydO1dnnk2bNsHf3x9ubm7o\n0aMHNm3apNM+ffp0vPPOOzrTyvri8OHDAP77+t28eTNiYmLg7e2Njh07Yv78+bh//768bosXL4ZG\no+EwdyW4BfQMezg0jIyMoFAo5NtfffUVvL29ER0dLX+Qx8fHY+bMmRg0aBCmTZuGS5cuYenSpQBK\n37gA8Ndff+Gjjz5CQEAAwsLCcObMGXzwwQfVqlGtVmPQoEFo2rQpIiMjYWpqirVr12LUqFHYt28f\nTExM5HmjoqIQEBCA5cuX4+eff8aKFSvg7OyMHj16AADOnTuHQYMGwcnJCZGRkWjSpAlSU1ORmZmJ\nBg0aICQkBLt378b7778vP+bFixfx+++/48svv6y0xp9++gnTpk1Dv379MHPmTJw5cwbLly/HzZs3\nER4ejm7dusHCwgJTp06Vh9bs7e0fud6SJKGkpARarRbnz5/Hhg0b0KlTJ5ibm8vzZGRkoF+/fnjx\nxRdx//597NmzB0OGDEFiYiIcHR0xZcoUZGVlobCwEGFhYQCgs9zff/8dWVlZmDNnDu7evYtFixYh\nIiICa9as0all8eLFcr8qlaXfWT/77DN8/fXXmDRpEtq0aYO9e/di2rRpUCqV6Nmzp1798qCZM2di\n6NChGDt2LGJiYjB16lS0bt0aLVu2xIoVK7B9+3bMnDkTnp6esLW1rbTf5s2bh4SEBIwZMwaenp7I\nz8/HgQMH5PYtW7Zg0aJFGDlyJDp16oSjR49i0aJFKC4uxnvvvffI56Qi69atw+uvv46YmBicPn0a\nsbGxeOGFFzBy5Eh069YN586dw+bNm7FlyxYAHOYuR6JnzvLlyyVnZ+dyf0eOHJEkSZKKi4slZ2dn\nqV+/fjr302g0UufOnaV58+bpTN+2bZvUrl07KT8/X5IkSZo4caIUHBwsabVaeZ4VK1ZIzs7O0u7d\nu3WWsWXLFp3H+uyzz6TXX39dvh0TEyN5e3tLN2/elKep1WrJ3d1d2rp1qyRJknTx4kXJ2dlZmj17\nts5jvfXWW9JHH30k3548ebL05ptvSoWFhRX2y/nz5yVnZ2fp119/lactXbpU8vX1lUpKSiq8jyRJ\nUt++faURI0boTFu9erXk6uoqZWdn69T4008/Vfo4D8738F9wcLCUlZVV6f00Go1UXFwsde/eXVq9\nerU8fcKECdK7775bbv7Q0FDJ09NTKigokKetW7dOcnFxkYqKinRqmTx5ss59c3NzpbZt20pffPGF\nzvSRI0dKQUFBVeqX7du3S87OzlJcXJw8z4EDByRnZ2cpLCxMnpafny+1bt1a+u677yrtg7Nnz0rO\nzs7S5s2bK2wvLi6WXn/99XKv37CwMMnT01Ne748++kgaMGCAzjwPP39lr99hw4bpzDd27FgpNDRU\nvr1hwwbJ1dW10pqfdRyCe0ZZWFjg+++/1/lr166dzjxvvvmmzu309HRkZ2ejZ8+eKCkpkf98fHxQ\nWFiI8+fPAyjdAvLz89PZmqrsqK/H+c9//gNfX1+Ym5vLy7OwsECbNm2QmpqqM6+vr6/ObScnJ50h\npWPHjiEoKAimpqYVLsvJyQkdOnTAzp07AQBarRZxcXHo06cPjIyMKrxPcXExzpw5g8DAQJ3pQUFB\n0Gg0+PPPP6u8zgAQFhaG77//Hjt27MDKlSvRsGFDjB07Fvfu3ZPnOXfuHN5//328/vrrcHV1RZs2\nbXD58mWd4clHadeunc438latWkGSJOTk5OjM9/Dr4OzZsygqKiq3zj179sT58+eRn59f5X7p2LGj\n/H+zZs0AAD4+PvK0Jk2aQKVS6TyfDzt27BgAoF+/fhW2Z2Zm4saNGxXWVFBQIL9+q+Jxrzl6NA7B\nPaOMjIzQtm3bR85jbW2tc7vsMOBRo0ZVOH9WVhYAIDc3t9x9H76tr7y8PKSmpiI+Pr5c28NB8vDw\nRoMGDVBUVASgdEgrPz8fNjY2j1xe//79sWjRInz88cc4ceIEsrKyKv1AA0qHCDUaTaXrm5+f/8jl\nVaZZs2Y6z0+HDh3g6+uL3bt3Y9CgQbh16xZGjRoFOzs7zJkzB/b29jA1NcXcuXPldX4cS0tLndsN\nGjQAgHL3f3jdrl+/DgBo2rSpzvSy2wUFBSgqKqpSvzxYS1kdj3o+K5Kfnw8LCwuYmZlV2F4WrA/X\nXVbTzZs3K33sylS1RtLFAKJKPbgFA5R+CwWARYsWwdnZudz8L774IoDSN3Rubq5O28O3jYyMYGxs\njOLiYp3pBQUFOrdVKhVeeeUVjBs3rtzyGjdurOealK6LSqWSPzwrExQUhEWLFiE5ORmHDx/Gq6++\nihYtWlQ6v5WVFYyMjKBWq3Wml62vSqXSu8ZHadq0KZo0aYL09HQApftvcnJysHnzZnmLASjff0/D\nw6+DshDPzc3V+QC+ceMGgNIwadSoUa30y4NUKhVu3bqFe/fuVRhCZfuOKnttlr2+TU1Ny70uqxNO\n9HgcgiO9tWrVCjY2Nrh27Rratm1b7q/sQ6Vt27Y4dOiQzhFo+/fv13kshUIBOzs7+QMVKD2C6ejR\nozrz+fj44Ny5c3BxcSm3vEcFQ0V8fHyQlJQkH6VUEXNzcwQFBeGbb77BgQMHHrn1A5R+43V1dcW+\nfft0pu/duxdGRkZo3759lWqsTHZ2NvLz8+WDCAoLCwFA5yCMX3/9Vd4KfbC+p/2N3MXFBaampuXW\ned++fWjVqhVUKlWt9cuDyobxKvrhLQA4ODigadOmFdbUpEkTtGrVCgDw/PPP4+rVqzqvkyNHjlSr\npgYNGkCj0ZQ74IdKcQuI9GZkZISZM2di7ty5KCgoQOfOnWFsbIwrV65g//79WL16NUxMTDBmzBiE\nhoZi2rRp6Nu3L86ePSvvV3lQ9+7dsX37drRu3RoODg7YsWOH/MFa5r333kNCQgKGDx+OoUOHwtbW\nFjdu3MDx48fh5eWFoKAgveufMmUK+vfvj6FDh2LEiBFQqVQ4deoUmjZtir59+8rz9e/fHwMHDoS5\nubl8RNfjHnfs2LGYN28eAgMDcebMGaxYsQKhoaGPPGLrUf755x9YWlpCkiRkZWVh3bp1sLS0lNe3\nQ4cOMDMzQ1hYGEaNGoWMjAysWrWq3PJatmyJw4cP48CBA7Czs4OdnV21aypjZWWFYcOGYeXKlVAq\nlXjllVewb98+/Pzzz1i2bJk8X030y6O0atUKb7/9NhYuXIgbN27Aw8MDN2/exIEDB7B06VIYGRlh\n4sSJiIyMhKWlJTp27Ihjx45h+/btmDFjhhzm/v7+WLlyJcLCwtCnTx+kpqZWGmqP07JlSwDAxo0b\n4eXlBQsLiyp/carPGEBUJb1794alpSXWrl2L77//HkqlEi+99BK6du0KY+PSl5O7uzuWLl2K2NhY\nHDhwAO3atUNsbGy531ZMmTIFeXl5iI2NRYMGDTBs2DA4OTnh+++/l+extrbG9u3bERsbi0WLFqGg\noAC2trbw8PCAi4tLlWp3cnLCli1bEBMTg3nz5kGhUODll18ud0oad3d3NG3aFJ07d9ZrmK9Lly5Y\nunQp1qxZg7i4OFhZWWH06NGYPHlylep70OLFi+X/mzZtirZt22LhwoXyFpCtrS0+//xzREVFYfz4\n8WjRogUiIyOxevVqnccZOnQozp49izlz5qCgoABTp07VOcy8uj788EM0aNAAmzdvhlqtRvPmzbF0\n6VKdwK6Jfnmc+fPnw9HRETt37sTatWthbW2Nzp07y+2DBw9GcXExvvnmG3z99dewt7fHnDlz8O67\n78rztG7dGgsWLMDatWuRkpICHx8fLFy4EEOGDKlyPT4+Phg5ciQ2btyImJgY+Pj4YOPGjU9jVesF\nhSTxktxU8woKCvDaa68hOjoaISEhost5pDNnziAkJATffPMNvLy8RJdDVG9xC4jo/6jValy4cAHL\nli1D69atGT5ENYwHIRD9n4MHD2LIkCHIy8vTGQIjoprBITgiIhKCW0BERCQEA4iIiITgQQiVyMu7\nA62Wo5NERPpQKhV47rlHX2jxYQygSmi1EgOIiKgG1doQ3Pvvv4/evXujT58+GDx4ME6fPg0AuHDh\nAgYOHIiAgAAMHDhQ50y+NdFGRESGodaOgrt165Z84sIDBw5g1apV2LVrF4YPH463334bISEhiIuL\nww8//CBfobAm2vSVm3ubW0BERHpSKhWwttb/BMFALW4BPXjW3Nu3b0OhUCA3NxdpaWkIDg4GAAQH\nByMtLQ1qtbpG2oiIyHDU6j6gefPm4ciRI5AkCevWrUNmZibs7Ozki30ZGRnB1tYWmZmZkCTpqbdZ\nWVnpXWtVk5yIiKqmVgNo4cKFAEpPlx4dHY2pU6fW5uKrhENwRET6M+ghuAf16dMHx44dw/PPP4/s\n7GxoNBoApdeDycnJgb29Pezt7Z96GxERGY5aCaA7d+4gMzNTvn3o0CE0adIE1tbWcHV1RUJCAgAg\nISEBrq6usLKyqpE2IiIyHLVyFNyNGzfw/vvv4969e1AqlWjSpAlmzZqFNm3aID09HbNnz0ZBQQEs\nLS0RFRUlX8SpJtr0xSE4IiL9VWcIjicjrQQD6NmmsjBBg4amostAcWER8m9VfglxIkNRnQDimRCI\nKtCgoSmSho8UXQaCNm0AGEBUT/FkpEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAR\nEQnBACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQERE\nJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJIRxbSwkLy8PM2fO\nxOXLl2FiYoJmzZohMjISVlZWcHFxgbOzM5TK0iyMjo6Gi4sLAODQoUOIjo6GRqNBmzZtsHjxYpiZ\nmT1RGxERGYZa2QJSKBQYPXo0kpOTER8fjxdffBExMTFy+7Zt2xAXF4e4uDg5fO7cuYOPP/4Ya9as\nwf79+9GoUSOsX7/+idqIiMhw1EoAqVQqeHt7y7fd3d2RkZHxyPscPnwYbm5uaN68OQAgNDQUe/fu\nfaI2IiIyHLUyBPcgrVaLrVu3ws/PT542bNgwaDQavPHGG5g8eTJMTEyQmZkJBwcHeR4HBwdkZmYC\nQLXbqsLaunGV70NUE2xsLESXQFQjaj2A5s+fD3NzcwwdOhQA8OOPP8Le3h63b9/GjBkzsGrVKnz4\n4Ye1XVY5ubm3odVKossgQQzpQ//69VuiSyB6LKVSUeUv7rV6FFxUVBQuXbqEZcuWyQcd2NvbAwAa\nN26MAQMG4Pfff5enPzhMl5GRIc9b3TYiIjIctRZAn332GVJTU7Fq1SqYmJgAAG7evInCwkIAQElJ\nCZKTk+Hq6goA6Ny5M/7++29cvHgRQOmBCj179nyiNiIiMhy1MgR37tw5rF27Fs2bN0doaCgA4IUX\nXsDo0aMRHh4OhUKBkpISdOjQAVOnTgVQukUUGRmJcePGQavVwtXVFfPmzXuiNiIiMhwKSZK4o6MC\n3Af0bLOxsUDS8JGiy0DQpg3cB0R1gsHvAyIiIirDACIiIiFq/TBsqjnPNTGBsYmp0BpK7hch7+Z9\noTUQUd3AAKpHjE1M8Vv0aKE1eMxcB4ABRESPxyE4IiISggFERERCMICIiEgIBhAREQnBACIiIiEY\nQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAA\nERGREAwgIiISggFERERCMICIiEgIBhAREQnBACIiIiEYQEREJEStBFBeXh7GjBmDgIAA9OrVC5Mm\nTYJarQYAnDx5Er1790ZAQABGjRqF3Nxc+X410UZERIahVgJIoVBg9OjRSE5ORnx8PF588UXExMRA\nq9VixowZCA8PR3JyMjw9PRETEwMANdJGRESGo1YCSKVSwdvbW77t7u6OjIwMpKamwtTUFJ6engCA\n0NBQ7Nu3DwBqpI2IiAxHre8D0mq12Lp1K/z8/JCZmQkHBwe5zcrKClqtFvn5+TXSRkREhsO4thc4\nf/58mJubY+jQodi/f39tL15v1taNRZdQZ9nYWIguoV5hf1J9VasBFBUVhUuXLmHNmjVQKpWwt7dH\nRkaG3K5Wq6FUKqFSqWqkrSpyc29Dq5WeYG1rn6F8UF2/fkt0CU/MUPoSqB/9SfWfUqmo8hf3WhuC\n++yzz5CamopVq1bBxMQEAODm5obCwkKcOHECALBt2zYEBgbWWBsRERmOWtkCOnfuHNauXYvmzZsj\nNDQUAPDCCy9g1apViI6ORkREBIqKiuDo6IglS5YAAJRK5VNvIyIiw6GQJKlujTPVkro6BPdb9Gih\nNXjMXFcvhoxsbCyQNHyk6DIQtGlDvehPqv8MegiOiIjoQQwgIiISggFERERCMICIiEgIBhAREQnB\nACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEgIBhAREQmhdwCtX7++wukbNmx4\nasUQEdGzQ+8AWrVqVYXTV69e/dSKISKiZ8djL0h39OhRAIBWq8Uvv/yCBy8fdPXqVTRq1KjmqiMi\nonrrsQE0b948AEBRURHmzp0rT1coFLCxsUFYWFjNVUdERPXWYwPo0KFDAICZM2ciOjq6xgsiIqJn\nw2MDqMyD4aPVanXalEoeTEdERFWjdwCdOnUKkZGROHv2LIqKigAAkiRBoVDg9OnTNVYgERHVT3oH\n0OzZs9G1a1csWrQIDRs2rMmaiIjoGaB3AF27dg0ffvghFApFTdZDRETPCL133vj7++Pnn3+uyVqI\niOgZovcWUFFRESZNmgQPDw80bdpUp41HxxERUVXpHUCtWrVCq1atarIWIiJ6hugdQJMmTarJOoiI\n6BmjdwCVnZKnIh07dnwqxRAR0bND7wAqOyVPmby8PBQXF8POzg4HDx587P2joqKQnJyMa9euIT4+\nHs7OzgAAPz8/mJiYwNTUFAAwffp0dO7cGQBw8uRJhIeHo6ioCI6OjliyZAmsra2fqI2IiAyD3kfB\nHTp0SOfvxIkTGD9+PIYOHarX/bt164Zvv/0Wjo6O5dqWL1+OuLg4xMXFyeGj1WoxY8YMhIeHIzk5\nGZ6enoiJiXmiNiIiMhzVPoeOkZERxo8fj3Xr1uk1v6enJ+zt7fV+/NTUVJiamsLT0xMAEBoain37\n9j1RGxERGQ69h+AqcuTIkafyw9Tp06dDkiR4eHhg2rRpsLS0RGZmJhwcHOR5rKysoNVqkZ+fX+02\nlUqld03W1o2feL2eVTY2FqJLqFfYn1Rf6R1AXbp00Qmbe/fu4f79+4iIiHiiAr799lvY29vj/v37\nWLhwISIjIw1iyCw39za0WunxMxoQQ/mgun79lugSnpih9CVQP/qT6j+lUlHlL+56B9CSJUt0bpuZ\nmaFFixZo3PjJthTKhuVMTEwwePBgTJgwQZ6ekZEhz6dWq6FUKqFSqardRkREhkPvfUBeXl7w8vKC\np6cnmjdvjjZt2jxx+Ny9exe3bpV+u5MkCUlJSXB1dQUAuLm5obCwECdOnAAAbNu2DYGBgU/URkRE\nhkPvLaDbt28jMjISSUlJKCkpgbGxMd566y2EhYXBwuLxwxULFixASkoKbty4gZEjR0KlUmHNmjWY\nPHkyNBoNtFotnJyc5CE9pVKJ6OhoRERE6BxO/SRtRERkOBSSJOm1o2P27Nm4c+cOpk2bBkdHR1y7\ndg2xsbEwMzNDVFRUTddZ6+rqPqDfokcLrcFj5rp6sc/CxsYCScNHii4DQZs21Iv+pPqvRvcB/fvf\n/8aBAwdgZmYGAGjRogUWL14Mf3//qlVJRESEKuwDMjU1hVqt1pmWl5cHExOTp14UERHVf3pvAfXv\n3x+jRo3CiBEj4ODggIyMDGzcuBEDBgyoyfqIiKie0juAJkyYADs7O8THxyMnJwe2trYYPXo0A4iI\niKpF7yG4hQsXokWLFti4cSOSkpKwceNGODk5YeHChTVZHxER1VN6B1BCQgLc3Nx0prm5uSEhIeGp\nF0VERPWf3gGkUCig1Wp1ppX9foeIiKiq9A4gT09PfP7553LgaLVarFixQj7rNBERUVVU6YJ048aN\ng6+vLxwcHJCZmQkbGxusWbOmJusjIqJ6Su8Aev7557Fr1y789ddfyMzMhL29Pdq1awelstqXFCIi\nomdYla4HpFQq4e7uDnd395qqh4iInhHcfCEiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERC\nMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAAERGREAwgIiISggFERERCMICIiEiIWgmgqKgo\n+Pn5wcXFBf/v//0/efqFCxcwcOBABAQEYODAgbh48WKNthERkeGolQDq1q0bvv32Wzg6OupMj4iI\nwODBg5GcnIzBgwcjPDy8RtuIiMhw1EoAeXp6wt7eXmdabm4u0tLSEBwcDAAIDg5GWloa1Gp1jbQR\nEZFhqdIluZ+mzMxM2NnZwcjICABgZGQEW1tbZGZmQpKkp95mZWVVpfqsrRs/xbV9ttjYWIguoV5h\nf1J9JSyADF1u7m1otZLoMqrEUD6orl+/JbqEJ2YofQnUj/6k+k+pVFT5i7uwALK3t0d2djY0Gg2M\njIyg0WiQk5MDe3t7SJL01NuIiMiwCDsM29raGq6urkhISAAAJCQkwNXVFVZWVjXSRkREhkUhSVKN\njzMtWLAAKSkpuHHjBp577jmoVCokJiYiPT0ds2fPRkFBASwtLREVFYWWLVsCQI20VUVdHYL7LXq0\n0Bo8Zq6rF0NGNjYWSBo+UnQZCNq0oV70J9V/1RmCq5UAqosYQNXDAHq6GEBUV1QngHgmBCIiEoIB\nREREQjCAiIhICAYQEREJwQAiIiIhGEBERCQEA4iIiIRgABERkRAMICIiEoIBREREQjCAiIhICAYQ\nEREJwQAiIiIhGEBERCQEA4iIiIRgABERkRAMICIiEoIBREREQjCAiIhICAYQEREJwQAiIiIhGEBE\nRCQEA4iIiIRgABERkRAMICIiEoIBREREQhiLLgAA/Pz8YGJiAlNTUwDA9OnT0blzZ5w8eRLh4eEo\nKiqCo6MjlixZAmtrawCodhsRERkGg9kCWr58OeLi4hAXF4fOnTtDq9VixowZCA8PR3JyMjw9PRET\nEwMA1W4jIiLDYTAB9LDU1FSYmprC09MTABAaGop9+/Y9URsRERkOgxiCA0qH3SRJgoeHB6ZNm4bM\nzEw4ODjI7VZWVtBqtcjPz692m0qlqtV1IiKiyhlEAH377bewt7fH/fv3sXDhQkRGRsLf319oTdbW\njYUuvy6zsbEQXUK9wv6k+sogAsje3h4AYGJigsGDB2PChAkYPnw4MjIy5HnUajWUSiVUKhXs7e2r\n1VYVubm3odVKT7hmtctQPqiuX78luoQnZih9CdSP/qT6T6lUVPmLu/B9QHfv3sWtW6VvMEmSkJSU\nBFdXV7i5uaGwsBAnTpwAAGzbtg2BgYEAUO02IiIyHMK3gHJzczF58mRoNBpotVo4OTkhIiICSqUS\n0dHRiIiI0DmcGkC124iIyHAoJEmqW+NMtaSuDsH9Fj1aaA0eM9fViyEjGxsLJA0fKboMBG3aUC/6\nk+q/OjkER0REzyYGEBERCcEAIiIiIRhAREQkBAOIiIiEYAAREZEQDCAiIhKCAUREREIwgIiISAgG\nEBERCcEAIiIiIYSfjLQusLBsiIamDYTWUFhUjFsFhUJrICJ6mhhAemho2gCDZ34rtIYt0UNwCwwg\nIqo/OARHRERCMICIiEgIBhAREQnBACIiIiEYQEREJAQDiIiIhGAAERGREPwdEBHVuCaWJjAxNRVd\nBu4XFeFmwX3RZdD/YQARUY0oWKqDAAAQ7ElEQVQzMTXFZ3PGiS4D0xavBcAAMhQcgiMiIiEYQERE\nJAQDiIiIhGAAERGREAwgIiISot4G0IULFzBw4EAEBARg4MCBuHjxouiSiIjoAfU2gCIiIjB48GAk\nJydj8ODBCA8PF10SERE9oF7+Dig3NxdpaWnYsGEDACA4OBjz58+HWq2GlZWVXo+hVCp0bjd9rtFT\nr7OqHq6pIiaW1rVQyaPpU2ddYNZUfF8C9ac/LVXsz6fFwsIUJiZir9J8/34xbt0qkm9Xp18VkiRJ\nT7MoQ5CamopZs2YhMTFRnhYUFIQlS5agTZs2AisjIqIy9XYIjoiIDFu9DCB7e3tkZ2dDo9EAADQa\nDXJycmBvby+4MiIiKlMvA8ja2hqurq5ISEgAACQkJMDV1VXv/T9ERFTz6uU+IABIT0/H7NmzUVBQ\nAEtLS0RFRaFly5aiyyIiov9TbwOIiIgMW70cgiMiIsPHACIiIiEYQEREJAQDiIiIhGAAERGREAwg\nIiISol6ejJT0c+/ePRw+fBiZmZkASs8g0blzZ5ibmwuujIieBfwd0DPqxx9/RFhYGNzc3ORTFGVm\nZiI1NRXz589H165dBVf4XxkZGdi3b59OUAYEBMDR0VFwZbpYJxkyQ/zCyQCqIYb+Jg8KCsLq1avR\nrFkznekXL17EhAkTsHfvXkGV6dqxYwdWrlyJ7t276wTlwYMHMXHiRAwYMEBwhaVY59Nn6O+hMnWh\nTkP9wskAqgF14U3eo0cPpKSkVLmttgUEBGDr1q3lzuOnVqsRGhrKOquortRZF95DQN2p01C/cHIf\nUA1Yt24ddu3aVe5NPnHiRISGhhrEi7JNmzYIDw/HwIED4eDgAKD0m9x3330HV1dXwdX9l1arrfAk\nss899xwM6bsT63y66sJ7CKg7dZaUlJQLHwBo3ry5fNUAERhANaAuvMkXL16M9evXY9asWcjIyIBC\noYCDgwMCAgIwZ84c0eXJfH19MXr0aLzzzjs6Qbl9+3Z06tRJcHX/xTqfrrrwHgLqTp2G+oWTQ3A1\n4F//+heuXLlS4Zv8hRdewCeffCK2wDpEq9Viz5492Lt3LzIyMgAADg4OCAwMREhICJRKw/glAet8\nuurKe6iu1FlYWIj169fLz/uDXzjfe+89mJmZCamLAVQD6sqbvDLXr1+HjY2N6DLoGVZX3kN1pU5D\nxQCicvr06YPdu3eLLuOxTp06hTZt2ogu47FYJxkykV84Gc+17NSpU6JLqNR//vMfAKgT4QMAn3/+\nuegS9MI6ny5Dfg89qK7UOWbMGGHL5hZQLRs7diy+/PJL0WXg/Pnz5aa99957+J//+R9IkoRWrVoJ\nqOrx7ty5g4sXL6JZs2Zo3Lix6HLqvHv37iE9PR0vvfQSLC0tRZejF0N5Dz1OXalTJAbQM6p169Zw\ndHTUOVInOzsbdnZ2UCgUOHjwoMDq/is8PBwffPABrKys8Ntvv2Hy5Ml47rnnoFarsWTJEvj6+oou\nEQDg7e2NXr164e233zaow9gftn//fsyaNQu2traIjo7G1KlTYWZmhtzcXCxevBh+fn6iS6RniUS1\nKjg4WHQJkiRJ0ooVK6TRo0dL165dk6d17dpVYEUV69Wrl/z/sGHDpD///FOSJEn6559/pL59+4oq\nq5yuXbtKCxculHx8fKQ+ffpI33zzjZSfny+6rHJCQkKkM2fOSMePH5e8vLyk3377TZIkSTp//rwU\nEhIiuLry1Gq1lJaWJqWlpUlqtVp0OXWWWq2W5s6dK40cOVLavHmzTtukSZMEVSVJ/B1QDahoeKtM\nXl5eLVZSuUmTJiEtLQ3Tpk1DSEgIBg0aBIVCIbqscoqKiuT/79y5g3bt2gEAWrRogeLiYlFlldOk\nSRPMnTsXM2bMwMGDB7Fz504sXboUb775Jvr3729Qv7FxcXEBADRq1AivvvoqAMDJyUlkSeVcvnwZ\nH3/8MdLS0mBrawsAyMnJwSuvvIJ//etfaN68udgC9dCrVy/Ex8eLLgMAEBERgRdeeAFdunTB1q1b\ncfToUSxbtgzGxsa4cuWKsLoYQDUgODi43PBWmfz8fAEVVeyVV17Bpk2bsHz5cowYMcKgPtDLdOzY\nEZ9++immTp0Kb29vJCUlISgoCEeOHIFKpRJdXjkNGjRAYGAgAgMDkZ2djV27dmH+/PnYt2+f6NIA\nAAqFAunp6SgoKMDdu3dx8uRJuLu748KFC0J/Ef+wmTNnYvDgwdiwYYN8KLNWq0V8fDxmzZqF7777\nTnCFperCl02g9JQ7y5cvBwD4+/sjMjIS48aNwxdffCG0Lu4DqgHdunXDli1bYGdnV66tS5cu+Omn\nnwRU9WgnT57E8ePHMXbsWNGl6Lh//z6io6MRFxcHlUqFK1euwNjYGN7e3vjkk0/w4osvii4RQN05\ndP1///d/MWvWLCiVSsTGxuLLL7/E9evXkZWVhU8++QTBwcGiSwQABAYGVhraj2qrbRXtSy2Tk5OD\n1NRUAVWV17Nnz3Lne4uKikJaWhpycnLEnXxY2OBfPfbpp5/KY+sPmz9/fi1XUz/cuXNHOn36tHTq\n1CmD3Bdw9epV0SVUS0lJifT3339L169fF12KjoEDB0rx8fGSVquVp2m1WikuLk4aMGCAwMp0+fn5\nSVlZWRW2vfHGG7VcTeXGjBkjHT9+vNz0pUuXSi4uLgIqKsUtICIyOBcvXkRERAROnz4tjyRkZ2ej\ndevW+OSTT9CyZUvBFZaKioqCv7+/vC/tQQsWLEBYWJiAqsrLz8+HQqFAkyZNyrWdP39e2M8uGEBE\nZLDUarXOdXYqOvEnPRmRB0vwIAQiMlhWVlblQseQji57FEOq01APlmAAEZHBqewDU5Ikgzq6zFA/\n2B9mqEfmMoCIyOAY6gfmw+pKnY6Ojo88MlcUBhARGRxD/cB8WF2ps0ePHrh27VqFdfr7+wuoqBQD\niIgMjqF+YD6srtQ5a9asSttEHqnHo+CIiEgIXg+IiIiEYAAREZEQDCCqc/z8/OSrt1ZFeHg4Vq1a\nBQA4duwY3njjjUrnnT17NmJjYwEAJ06cQEBAQPWKrUUP9suaNWswb948wRU93p49ezBq1KhK24cN\nG4YdO3bUYkVUm3gQAj0zIiMjq3U/T09PJCcnP+Vqatb48ePl/69evYpu3brh1KlTMDZ+/Fv+2LFj\nmDFjBg4fPlyTJQIAevfujd69e9f4csgwcQuIiIiEYABRnfT3338jKCgIr732GubMmYOioiLs3LkT\ngwYN0pnPxcUFly5dAqA7rPawtLQ09O3bFx06dMAHH3ygcyG8h4fr/Pz8sH79evTq1QseHh7l5v/q\nq6/g6+sLX19f7NixQ6eGvLw8jB8/Hq+++ir69++PZcuWyTVfvXoVLi4uKCkpkR/rwSGoy5cvY/jw\n4fD29oa3tzc++ugjFBQUVLg+K1aswPTp0wEAQ4cOBQC89tpr6NChA44fPw4vLy+cPXtWnj83Nxft\n27fHtWvXMGbMGOTk5KBDhw7o0KEDsrOz0b59e51f9p86dQo+Pj4oLi7Gzp07ERoaisjISHh4eCAw\nMBBHjx6V57116xbmzp0LX19fdO7cGbGxsfK1hx5+zo4cOYLAwEB4eHggMjKywh94Uv3BAKI6KT4+\nHuvXr8f+/ftx4cKFJ7qw1v379zFx4kSEhITg+PHjCAwMREpKyiPvs3fvXqxbtw4HDx7E2bNnsXPn\nTgDA4cOHsXHjRmzYsAH79+/HsWPHdO4XGRkJU1NT/Pzzz1i0aBF++OEHveuUJAnjxo3Dv//9b+zd\nuxdZWVlYsWLFY++3efNmAMCvv/6KP/74A15eXggKCsKePXvkeRISEtCxY0c4Ojriq6++gq2tLf74\n4w/88ccfsLOzg5eXl841Y+Li4vDWW2+hQYMGAIC//voLL730En755RdMmTIFkyZNks8EMHv2bBgb\nGyMlJQW7d+/GkSNHKtyvo1arMWnSJHzwwQf45Zdf8NJLL+H333/Xu3+o7mEAUZ00ZMgQ2NvbQ6VS\nYcKECUhMTKz2Y/35558oLi7Gu+++K1/RtG3bto+8z7Bhw2BnZweVSoWuXbvi9OnTAEqDqV+/fnj5\n5ZdhZmaGyZMny/fRaDRISUnBlClTYG5uDmdnZ/Tt21fvOps1a4ZOnTrBxMQEVlZWGDlyJH799ddq\nrXPfvn2RmJgob2HExcU9cl9M37595cDSaDRITExESEiI3G5lZSX3X1BQEFq0aIEff/wRN27cwE8/\n/YS5c+fC3Nwc1tbWGDFiRIXP1+HDh/Hyyy8jMDAQDRo0wLvvvoumTZtWa/2obuBBCFQn2dvby/87\nODggJyen2o+Vk5MDOzs7KBQKncd8FBsbG/l/MzMzefk5OTlwc3OrsE61Wo2SkpJytevrxo0bWLhw\nIU6cOIE7d+5AkiRYWlrqff8HtW/fHg0bNsSxY8dgY2ODy5cvo1u3bpXO361bN0RERODKlSu4cOEC\nGjdujHbt2sntFfVfTk4OMjIyUFJSAl9fX7lNq9Xq9EGZnJwcPP/88/JthUJR4XxUfzCAqE4qu0YM\nAGRkZMDW1hZmZmYoLCyUp1+/fl2vx7KxsUF2djYkSZI/RDMyMqp1uW9bW1tkZ2dXWKeVlRWMjY2R\nmZkJJyencu3m5uYAgMLCQjRu3LjcOnz22WdQKBSIj4+HSqXCgQMH9Dqy78FgeFDZVo2NjQ0CAgJg\nampa6fympqbo2bMn9uzZg3/++Udn6wdAuf7LzMyEn58fnn/+eZiYmOCXX3557BF4NjY2yMrKkm9L\nkqTTP1T/cAiO6qQtW7YgKysL+fn5WLNmDYKCgtC6dWucO3cOp0+fRlFRkV77RwDA3d0dxsbG2LRp\nE4qLi5GSkoK///67WnUFBgZi586dSE9Px71793T2TRkZGcHf3x8rV67EvXv3cP78eezatUtut7Ky\ngp2dHeLi4qDRaPD999/jypUrcvudO3dgbm4OCwsLZGdnY926dXrVZGVlBaVSqfNYQOkh0AcOHMCe\nPXvQp08febq1tTXy8/Nx69YtnflDQkKwa9cuHDp0qFwAqdVquf/27t2L9PR0dOnSBba2tujUqRM+\n/fRT3L59G1qtFpcvX8bx48fL1dmlSxecO3cOKSkpKCkpwaZNm3Djxg291pHqJgYQ1UnBwcEYNWoU\nunfvjpdeegkTJkxAixYtMHHiRIwYMQI9evSAh4eHXo9lYmKCFStWYNeuXfDy8kJSUlK1TyTZpUsX\nDBs2DMOHD4e/vz/at28vLwMo/THs3bt30alTJ8yePRv9+vXTuf/8+fOxfv16eHt74/z58+jQoYPc\nNmnSJKSlpcHT0xNjx45Fjx499KrJzMwM48ePx6BBg+Dp6YmTJ08CKB0efOWVV6BQKODp6SnP7+Tk\nhLfeegvdu3eHp6envEXn4eEBpVKJNm3awNHRUWcZ7dq1w6VLl+Dj44Nly5Zh+fLleO655wAA0dHR\nKC4ulo9anDJlSoVbp1ZWVvj888+xdOlSeHt749KlSxVe6prqD56MlKgGpaenIzg4GH///XeFQ1A7\nd+7Ejh07sHXrVgHVAXPmzIGtrS0+/PBDveYfPnw4evXqhQEDBsjTRK8D1V3cAiJ6yvbv34/79+/j\n5s2bWLJkCbp27arXGQhq29WrV7F//370799fr/n/+usvpKWloWfPnjVcGT0rGEBET9m2bdvQsWNH\n+Pv7w8jICJ988onokspZtmwZevXqhffee0+vgy1mzZqFkSNHYu7cufIBEkRPikNwREQkBLeAiIhI\nCAYQEREJwQAiIiIhGEBERCQEA4iIiIT4/9XIitKxsMjJAAAAAElFTkSuQmCC\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "KOHPCFRSp5y9",
-        "colab_type": "code",
-        "outputId": "e0f3fe2e-a82a-49e8-a798-a3f79a30bcee",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 274
-        }
-      },
-      "source": [
-        "# let's look more into year built vs type \n",
-        "plt.plot(pd_train.yearbuilt, pd_train.buildingqualitytypeid, 'ro')\n",
-        "# display the graph\n",
-        "plt.show()"
-      ],
-      "execution_count": 188,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEBCAYAAACQbKXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGxtJREFUeJzt3WtwE+ehBuB3JUXGxqi2ZAPmmmYa\nU2caQusMZtwUgiGYNkBaM1NSLk4GEtqmpNCGSQlNSxoIRCWhYQKJSQ8tIcnAH3toAz2FdLikQKHQ\nXKgzFFNjwDPgiyRzjIkvSPrOD5DiiyTrstLu+nufX7Cr/fbd1eqVWC1aRQghQEREUjFpHYCIiFKP\n5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQS\nYvkTEUnIonWAlpYb8PsFHI5MuN1tWseJC7Nrg9lTz6i5gYGT3WRSkJ09OOExNS9/v1/A7xfBPxsV\ns2uD2VPPqLkBZu+Op32IiCTE8icikhDLn4hIQlGVv9PpRElJCcaNG4eamhoAQEtLC5588kmUlpZi\n9uzZWLZsGTweT1LDEhGROqL6wnfatGkoLy/HggULgtMURcETTzyBoqIiALfeIF555RWsX78+OUkp\n6VpPHIerqhJejxswmQC/Hxa7Azllc2GbVNz3MQEmE2yTp2D4wscijtl7rHizBcbJnV0a97YmItpt\nCrmvbqsJMW5grGvH/o6Os2dDzutvPRa7Axnjx+PzM2d65AMQ9rntPi8w7fP/nkfrh0cAv7/P89vw\n7ttfzOtNUYDetwW/vT5TZiaEEBA3bgSnxcxkwqBx4+BtbILX44aSlgbR2RnVfmwYfy+6bnr77FtV\nmM2Az9cjZ7jXhF4osdzAvaSkBBUVFcjPz+8zb//+/di1axd27NgRUwC3uw1+v0Bu7hA0N1+PaVm9\nGAjZW08cR+POHRBdXX0eo1itGFb+OACEfQwA2B6c2uNgDzVmYKxY3gDCjfOVZT+Gcs/Xox5HDdFu\nU6T9GVGo8kx0PWYzAAXweaObFyaD7cGpSE+3ovF/90e5MRqKsB9TqfdrIl7dO8ZkUuBwZCY8pirn\n/P1+P3bt2oWSkhI1hiMNuKoqwxaI6OqCq6oy4mMA3Po02M+YgbESzSa6unD5nfdiGkcN0W5Tf/sq\nrAiFFfd6fL7QxR9uXpgMrR8eQeP+DyKvSy90UPxA39eEnqhynf/atWuRkZGBhQsXxrxs93ew3Nwh\nasTRhNGz17RE/r7G2898AIDf32M/hBvT2+KJaX+FG6fT5U75fo92m/rbn/FK1XpCiuc0jex6vSYS\nofaxnnD5O51OXLp0CRUVFTCZYv+HBE/7aCuQ3ZJtD3luOsCSbQeAiI+BydRjP4Qb05Jtj2l/hRsn\nLceR8v0e7Tb1tz8TWX8q1hNS4PXNN4Ho9XpNxEt3p302bdqE6upqbN26FVarNeEwpJ2csrlQwjyH\nitWKnLK5ER8DALbJU/odMzBWotkUqxVjFi0Is0TyRLtN/e2rsBQl/Kx412M2A+Ywn/NCzQuTwTZ5\nCoaVPhR5XXoRYT+mUu/XhJ6YX3jhhRf6e9C6devw/PPPo6mpCfv370dVVRUmTpyIZ555BnfccQf2\n7NmD3bt34+jRo3j44YdjCtDe3gUhgMGD0/D553GcI9WBgZA9bdRo3OFwoOPiRfjb2299yhMCFrsD\nQx+dD9uk4r6PCTCZYJvyYJ8vtno/vvtYsQg3zugZ01K+36PdprD7KgKL3YGhCxbC2/45vC5X33lR\nrMdidyCzaBJ8rde/yDd/ATK//vXQz22veYEMJpsNnZcv3zp33u35HT2lGP/X0PzFvN5Cle7t9Zky\nM4E77gBu3gxOi5nJhEFf/SrgF/C3t0NJS+t5lU2E/Wgbfy8Uu6PPvlWF2dxze8K8JuLVvWMURUFG\nRuIftmO62icZeNpHW8yuDaNmN2puYOBk18VpHyIiMiaWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFT5SWciolipdZc3ig/Ln4hSrvcdyLweNxp37gAAvgGkCE/7\nEFHKqXWXN4ofy5+IUi7cDWhSdmMaYvkTUepZ7I6YppP6WP5ElHJq3eWN4scvfIko5QJf6vJqH+2w\n/IlIE7ZJxSx7DfG0DxGRhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJCGWPxGR\nhPotf6fTiZKSEowbNw41NTXB6XV1dZg3bx5KS0sxb948XLx4MZk5iYhIRf3+vMO0adNQXl6OBQsW\n9Ji+Zs0azJ8/H4888gj+9Kc/4de//jV27tyZtKBaa3j3bbR+eATw+wGTCbbJU5DxlbvhqqpEjccN\nmEyA35/U3yjpfuejIEUBrFagszOqDL3HqOnziNQJ5Gx4+4/AzZsxLx8u+6CCAox55hd9pl9+1YmO\ns2e/mJCeDrS3x7xeNWi53xNh1NyAzrKnpyP/9Tc1jaAIIUQ0DywpKUFFRQXy8/PhdrtRWlqKkydP\nwmw2w+fzoaioCAcOHIDdbo8pgNvdBr9fIDd3CJqbr8e1EcnW8O7baD18qO8MRQFC7D7FasWw8sdV\nfQPofeej/oTKEOsYRtb7DaBP8RNpLYY3gO79aDIpcDgyE159XOf8r169imHDhsFsNgMAzGYzhg4d\niqtXryYcSI9aPzwSekaY981k3JEo1J2PIgmVIdYxjKx30bP4SXc0+ldngOa/6tn9HSw3d4iGScKr\n8ftjXsbb4lF1e2paPAlniGcMI+ux7RrmIAonlo5Qux/jKv+8vDw0NjbC5/MFT/s0NTUhLy8v5rGM\ncNoncC49FpZsu6rbY8m2x3yLu94Z4hnDyHR7PBHdFu0xqpvTPg6HAwUFBdi7dy8AYO/evSgoKIj5\nfL9R2CZPCT1DUUJPTsIdiULd+SiSUBliHcPIBhUURPw7kebS0zVdfb9f+K5btw4HDhyAy+VCdnY2\nsrKysG/fPtTW1mLVqlVobW2FzWaD0+nEXXfdFXMAQ3zyR+SrfbwGvtpHS4le7ROOEa72IcnFeLVP\nMj75R321T7IYpfwjYXZtMHvqGTU3MHCya3rah4iIjI3lT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0Qk\nIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMR\nSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIZY/EZGELIkO\ncOjQIWzevBlCCAghsGzZMsyYMUONbERElCQJlb8QAs8++yzee+895Ofn4z//+Q9+8IMfYPr06TCZ\n+I8KWbSeOA5XVSW8HjcsdgdyyubCNqlY61hxGUjbYiQDab+H2hYAutu+hD/5m0wmXL9+HQBw/fp1\nDB06lMUvkdYTx9G4cwdEVxcAwOtxo3HnDgDQ/OCO1UDaFiMZSPs91LY0/PEPAATg8wWn6WH7Empp\nRVHw2muv4amnnsLUqVPxk5/8BE6nU61sZACuqsrggR4gurrgqqrUKFH8BtK2GMlA2u+htgU+b7D4\nA/SwfQl98vd6vdi2bRveeOMNFBYW4l//+hdWrFiBffv2YfDgwVGN4XBkBv+cmzskkTiakjV7TYsn\n5HRviycl+0TNdaR6W4x6zKidO5X7Pdn7PNy2hBLr9qmdPaHyP3v2LJqamlBYWAgAKCwsRHp6Ompr\nazF+/PioxnC72+D3C+TmDkFz8/VE4mhG5uyWbDu8HnfI6cneJ2rv91Rui1GPmWTkTtV+T8U+D7ct\n4R4bbZ7u2U0mpceH5ngldNpn+PDhaGhowIULFwAAtbW1cLvdGDNmTMLByBhyyuZCsVp7TFOs1uCX\nXEYykLbFSAbSfg+1LTBbALO5xyQ9bF9Cn/xzc3PxwgsvYPny5VAUBQCwfv16ZGVlqRKO9C/whZXe\nrmSIx0DaFiMZSPs93LaEmqb19ilCCKFlAJ720Raza8Oo2Y2aGxg42XVx2oeIiIyJ5U9EJCGWPxGR\nhFj+REQSYvkTEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9EJKGEb+aSajVP/xhob+8xLfBbGZ//\n9zxaPzwC+P2AyQTb5CloPXwo+ZmSvobkYXZtGDW7UXMDGmZXFEAIwGQC/H7+tk9ALL/tE6r4gwI7\nmIhI5xSrFcPKH4/6DYC/7ROu+AEWPxEZhh7u5GWs8iciGiCivelLsrD8iYg0YLE7NF2/sco/PT38\nvNs3kyEi0js93MnLUOWf//qbId8ALHYHhi95ErYHp976Rh24dbXPg1NTnJCIqJfAB9Pb3WSxO2L6\nsjdZDHW1j14xuzaYPfWMmhsYONnlvNqHiIhUwfInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+I\nSEIsfyIiCbH8iYgklPDNXDo7O7F+/Xr84x//QFpaGiZMmIC1a9eqkY2IiJIk4fLfuHEj0tLSsH//\nfiiKApfLpUYuIiLDaz1xHK6qSng9bt3cwSsgofK/ceMG9uzZgyNHjkC5/eNFOTk5qgQjIjKy1hPH\n0bhzB0RXF4Bbv9/fuHMHAOjiDSChc/719fXIysrCli1bUFZWhkWLFuH06dNqZSMiMixXVWWw+AP0\ncAevgIQ++ft8PtTX1+Oee+7BL37xC3z66af40Y9+hA8++ACZmdH96lz3X6fLzR2SSBxNMbs2mD31\njJobSG32mhZPyOneFk9cOdTOnlD55+XlwWKxYNasWQCA++67D9nZ2airq8O9994b1Rj8SWdtMbs2\njJrdqLmB1Ge3ZNtD3qrRkm2POYfuftLZbrejqKgIx44dAwDU1dXB7XZj7NixCQcjIjKynLK5UKzW\nHtP0cAevgISv9vnNb36D1atXw+l0wmKx4Le//S1sNpsa2YiIDCvwpe6AvNoHAEaPHo133nlHjSxE\nRAOKbVKxbsq+N/4PXyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIi\nCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+I\nSEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpKQauW/ZcsWjBs3DjU1NWoNSURE\nSWJRY5DPPvsMn3zyCUaOHKnGcDFrPXEcrqpKeD1uWOwO5JTNhW1Scb/z4h3z8qtOdJw9G3xsj7c7\nRQGsVqCzEzCZAL8/uDyA4JiBeVoz8ls1s6eeUXMD+s2e/z87NFlvwuXf1dWFF198Ea+++irKy8vV\nyBST1hPH0bhzB0RXFwDA63GjceeO4Pxw8yK9AUQa89qxv/co/j6EuFX8QLDcvR43Gv74BwAC8Pl6\nzCMiudU88bgmbwAJl//mzZsxZ84cjBo1So08MXNVVQZLOkB0dcFVVRn8c6h5kco/0phejzu+oD5v\nfMsRESVBQuX/8ccfo7q6GitXrox7DIcjM/jn3NwhMS9f0+IJOd0bZnpgXqR1xTMmEVG8oum+ePox\nkoTK/9SpU6itrcW0adMAAA0NDViyZAk2bNiABx54IKox3O42+P0CublD0Nx8PeYMlmx7yE/jlmw7\nAISdF2ldkcaM+5M/EVEY/XVf9340mZQeH5rjldDVPkuXLsXRo0dx8OBBHDx4EMOHD8f27dujLn41\n5JTNhWK19pimWK3IKZsbcV68Yw4qKIgvqNkCmM3xLUtEpDJVrvbRUuDcfaQremK92ifSmLZJxX2u\n9unBYFf7EJG2tLraRxFCCE3WfFuip330gNm1weypZ9TcwMDJrovTPkREZEwsfyIiCbH8iYgkxPIn\nIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8\niYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIsfyIiCbH8iYgkxPInIpIQy5+ISEIs\nfyIiCbH8iYgkZElk4ZaWFjz77LO4fPkyrFYrxo4dixdffBF2u12tfERElAQJffJXFAVPPPEE9u/f\nj/fffx+jR4/GK6+8olY2IiJKkoTKPysrC0VFRcG/T5gwAVeuXEk4FBERJZcihBBqDOT3+7F48WKU\nlJSgvLxcjSGJiChJEjrn393atWuRkZGBhQsXxrSc290Gv18gN3cImpuvqxUnpZhdG8yeekbNDQyc\n7CaTAocjM+ExVSl/p9OJS5cuoaKiAiYTLyAiItK7hMt/06ZNqK6uxltvvQWr1apGJiIiSrKEyv/8\n+fPYtm0b7rzzTjz66KMAgFGjRmHr1q2qhCMiouRIqPzvvvtunDt3Tq0sRESUIjxBT0QkIZY/EZGE\nWP5ERBJi+RMRSYjlT0QkIZY/EZGEWP5ERBJi+RMRSYjlT0QkIdV+1ZNuaT1xHK6qSng9bljsDuSU\nzYVtUnHMy2WMH4/Pz5wJOU73x5oyMyGEgLhxo9/lQq2vxuMGTCbA71d9XyRbTZjpSloaRGcnLHYH\nLMOGouPcOd1tX7jsemfU3IAOs5tMsE2eguELH9Nk9ar9nn+8BtJPOreeOI7GnTsgurqC8xSrFcPK\nH4/4BhBqud4C4wDo97Ghluu+/mjWR0SpYXtwar9vAMn4SWee9lGRq6qyT6GKri64qipjXq63wDjR\nPLa/9cc6BhElT+uHRzRZL0/7qMjrccc0Pdr5sT6uv+XiHYeIkkCjU5L85K8ii90R0/Ro53d/XLSP\njTR+PGMQUZJodAMslr+KcsrmQul1QxvFakVO2dyYl+stME40j+1v/bGOQUTJY5s8RZP18rSPigJf\nqsZ6tU+o5fq7aieRq316r8+oV/uEY4SrfYh4tc8AutrHiJhdG0bNbtTcwMDJzqt9iIgobix/IiIJ\nsfyJiCTE8icikpDmV/uYTErIPxsNs2uD2VPPqLmBgZFdrW3Q/GofIiJKPZ72ISKSEMufiEhCLH8i\nIgmx/ImIJMTyJyKSEMufiEhCLH8iIgmx/ImIJMTyJyKSUFLK3+l0oqSkBOPGjUNNTU1w+qFDh/Dd\n734XjzzyCObMmYMDBw5ENa+urg7z5s1DaWkp5s2bh4sXLyYjdsTshw8fxve+9z3Mnj0bCxcuRH19\nfVT59Jy9paUFTz75JEpLSzF79mwsW7YMHo8nuNwnn3yCOXPmoLS0FIsXL4bbnbx7/8az3wO2bNnS\nZzm9Z+/s7MSaNWswY8YMzJ49G7/61a+C8/R8zAD6eK1GOnYjPffxztM6e11dHRYtWoSZM2di1qxZ\neO6559DR0REc8+DBg5g5cyYeeughrFixAu3t7f0HEUlw6tQpceXKFTF16lRx7tw5IYQQfr9f3H//\n/cG/nz17VkyYMEH4fL6I84QQYtGiRWLPnj1CCCH27NkjFi1alIzYYbNfu3ZNTJw4UVy4cCGYYfHi\nxcFlIuXTc/aWlhZx4sSJ4PIvv/yyeO6554QQQvh8PjF9+nRx6tQpIYQQW7duFatWrdJN9oDq6mqx\nZMmSHssZIfvatWvFSy+9JPx+vxBCiObm5uA8PR8zenmthjt2Iz338c7TQ/b6+nrx2WefBbMuX75c\nbNmyRQghRFtbmyguLhZ1dXVCCCFWr14tXn/99X5zJKX8A3qX/8SJE8Xp06eFEEL885//FDNmzOh3\nnsvlEoWFhcLr9QohhPB6vaKwsFC43e5kRu+R/dNPPxXf+c53gvNaWlpEfn6+cLvdEfPpPXtvf/3r\nX8Vjjz0WXO7hhx8OznO73WLChAlJzS1EbNk7OzvF97//fVFfX99nOT1nb2trE4WFhaKtra3PGHo/\nZvT4WhXii2M30nMf7zw9ZO9t+/btYvXq1UIIIf7yl7+IpUuXBuedOXOmx/MXTsp+1VNRFLz22mt4\n6qmnkJGRgRs3buCtt97qd97Vq1cxbNgwmM1mAIDZbMbQoUNx9epV2O32lGT/8pe/DJfLhTNnzmD8\n+PF4//33g9mEEGHzRZqnh+zdM/j9fuzatQslJSXB+SNGjAjOt9vt8Pv9uHbtGrKysnSRffPmzZgz\nZw5GjRrVYzm9ZzebzcjKysKWLVtw8uRJDB48GMuXL8f999+v++Pdbrfr7rXa/diN9NzHOy+Zx0y0\n2btn6OjoQGVlJX7+858D6Hu8jxgxAlevXu133Sn7wtfr9WLbtm144403cOjQIbz55ptYsWIFbty4\nEXGeHgwZMgS/+93vsGHDBpSVlcHtdsNmswUPcj2LNvvatWuRkZGBhQsXapS0r0jZP/74Y1RXV2P+\n/PlaxwwpUnafz4f6+nrcc889qKqqwsqVK/H000+jra1N69gAImfX42tVj8dutGLN7vV68bOf/QyT\nJk3CtGnTElp3yj75nz17Fk1NTSgsLAQAFBYWIj09HbW1tVAUJey8kSNHorGxET6fL/jCaWpqQl5e\nXqqiAwCKi4tRXFwMAHC5XNi+fTvGjBmD9vb2sPmEELrOHuB0OnHp0iVUVFTAZLr1eSAvLw9XrlwJ\nPsbj8cBkMqXsk3N/2d99913U1tYGXwANDQ1YsmQJNmzYoPvsHR0dsFgsmDVrFgDgvvvuQ3Z2Nurq\n6jBixAhdHzORXsdavFZ7H7uRnvt45+khOwD4fD6sXLkSX/rSl/D8888HH5eXl4eTJ08G/37lypWo\n9nnKPvkPHz4cDQ0NuHDhAgCgtrYWbrcbY8aMiTjP4XCgoKAAe/fuBQDs3bsXBQUFKfsncEBzczOA\nW/9M27RpEx599FFkZGREzKf37ACwadMmVFdXY+vWrbBarcFlvva1r6GjowOnT58GAOzevRszZ85M\nae5I2ZcuXYqjR4/i4MGDOHjwIIYPH47t27fjgQce0H12u92OoqIiHDt2DMCtKzncbjfGjh2r+2NG\nT6/VUMdupOc+3nl6yO73+7Fq1SqYzWa89NJLUJQvbujyrW99C//+97+DV1bt3r0b3/72t/vNkJSb\nuaxbtw4HDhyAy+VCdnY2srKysG/fPvz5z3/G73//+2Dwn/70p5g+fToARJxXW1uLVatWobW1FTab\nDU6nE3fddZfasSNm/+Uvf4mPPvoIN2/exDe/+U2sXr0aaWlp/ebTc/bz589j1qxZuPPOOzFo0CAA\nwKhRo7B161YAwEcffYQ1a9ags7MTI0eOxMaNG5GTk6OL7L2VlJSgoqIC+fn5hsheX1+P1atX49q1\na7BYLFixYgWmTJkCQN/HDKCP12qkYzfScx/vPK2zHz58GD/84Q+Rn58f/Nf5N77xDaxZswYA8Le/\n/Q0bN26E3+9HQUEBXn755eAHvHB4Jy8iIgnxf/gSEUmI5U9EJCGWPxGRhFj+REQSYvkTEUmI5U9E\nJCGWPxGRhFj+REQS+n9YnE5sVgm99QAAAABJRU5ErkJggg==\n",
-            "text/plain": [
-              "<Figure size 432x288 with 1 Axes>"
-            ]
-          },
-          "metadata": {
-            "tags": []
-          }
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_647tI5Lp94v",
-        "colab_type": "text"
-      },
-      "source": [
-        "### Final adjustments\n",
-        "- filling nans"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ofZIC0EdKJ0Y",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: test ready-----\n",
-        "- converting to pandas \n",
-        "  - to see what's going on\n",
-        "    - figuring out what can and what can't be replicated in cuML"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "-4A3-sjRp8AE",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "from sklearn import neighbors\n",
-        "# from cuml.preprocessing.model_selection import train_test_split\n",
-        "from sklearn.model_selection import StratifiedKFold,GridSearchCV,train_test_split\n",
-        "#location seems to be related to building quality, (knnclassifier)\n",
-        "\n",
-        "def fillna_knn(df, base, target):\n",
-        "    data_colnames = [target] + base\n",
-        "    #print(\"data_colnames\",data_colnames)\n",
-        "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
-        "    #print(\"miss\",missing_values_boolflag.head())\n",
-        "    not_missing_boolflag = ~missing_values_boolflag \n",
-        "    #print(\"not miss\",not_missing_boolflag.head())\n",
-        "    number_of_missing_val = missing_values_boolflag.sum()\n",
-        "    print(\"# of miss\",number_of_missing_val)\n",
-        "    not_missing_rows = df.loc[not_missing_boolflag, data_colnames]\n",
-        "    #print(not_missing_rows.head())\n",
-        "    Y = not_missing_rows[target]\n",
-        "    X = not_missing_rows[base]\n",
-        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, \n",
-        "                                                        test_size=0.20,\n",
-        "                                                        random_state=3192,\n",
-        "                                                        stratify=Y)\n",
-        "    metrics       = ['euclidean'] \n",
-        "    weights       = ['distance'] \n",
-        "    numNeighbors  = [5,10,15,20,25]\n",
-        "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
-        "    cv            = StratifiedKFold(n_splits=3,random_state=3192,shuffle=False)\n",
-        "    grid = GridSearchCV(neighbors.KNeighborsClassifier(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='f1_weighted',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
-        "    grid.fit(X_train ,Y_train)\n",
-        "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
-        "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
-        "    print(\"grid.best_params_\",grid.best_params_)\n",
-        "    print(\"grid.scorer_\",grid.scorer_)\n",
-        "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
-        "    y_true, y_pred = Y_test, grid.predict(X_test)\n",
-        "    \n",
-        "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
-        "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
-        "    return Z"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "6eES-hq--NKZ",
-        "colab_type": "code",
-        "outputId": "2bc86856-507d-47bf-cfab-d29649cba819",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 903
-        }
-      },
-      "source": [
-        "# make safe copy\n",
-        "# test = df_train.copy()\n",
-        "df_train = test.copy()\n",
-        "# switch to pandas (figuring out what's going on)\n",
-        "df_train = df_train.to_pandas()\n",
-        "\n",
-        "print(df_train.info())"
-      ],
-      "execution_count": 191,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "<class 'pandas.core.frame.DataFrame'>\n",
-            "RangeIndex: 90275 entries, 0 to 90274\n",
-            "Data columns (total 45 columns):\n",
-            "parcelid                                 90275 non-null int64\n",
-            "logerror                                 90275 non-null float64\n",
-            "ac_id                                    28781 non-null float64\n",
-            "basement_sqft                            90275 non-null float64\n",
-            "total_bath                               89110 non-null float64\n",
-            "bedroomcnt                               88854 non-null float64\n",
-            "buildingqualitytypeid                    57364 non-null float64\n",
-            "deck_flag                                90275 non-null float64\n",
-            "finished_living_area_entryfloor_sqft2    6856 non-null float64\n",
-            "total_finished_living_area_sqft          89614 non-null float64\n",
-            "finished_living_area_entryfloor_sqft1    6856 non-null float64\n",
-            "fips                                     90275 non-null float64\n",
-            "fireplace_count                          90275 non-null float64\n",
-            "full_bath                                89093 non-null float64\n",
-            "garagecarcnt                             29937 non-null float64\n",
-            "garage_sqft                              21017 non-null float64\n",
-            "has_hottub_or_spa                        90275 non-null int64\n",
-            "heating_system_id                        56080 non-null float64\n",
-            "latitude                                 90275 non-null float64\n",
-            "longitude                                90275 non-null float64\n",
-            "lot_area_sqft                            80125 non-null float64\n",
-            "pool_count                               90275 non-null float64\n",
-            "pool_sqft                                90275 non-null float64\n",
-            "just_hottub_or_spa                       90275 non-null float64\n",
-            "pool_with_spa_tub_yes                    90275 non-null float64\n",
-            "pool_with_spa_tub_no                     90275 non-null float64\n",
-            "propertylandusetypeid                    90275 non-null float64\n",
-            "roomcnt                                  88859 non-null float64\n",
-            "basement_flag                            90275 non-null float64\n",
-            "half_bath                                89093 non-null float64\n",
-            "unitcnt                                  90275 non-null float64\n",
-            "patio_sqft                               90275 non-null float64\n",
-            "storage_sqft                             90275 non-null float64\n",
-            "yearbuilt                                89519 non-null float64\n",
-            "numberofstories                          20581 non-null float64\n",
-            "fireplaceflag                            90275 non-null bool\n",
-            "structure_tax                            89895 non-null float64\n",
-            "total_parcel_tax                         90274 non-null float64\n",
-            "land_tax                                 90274 non-null float64\n",
-            "total_property_tax_2016                  90269 non-null float64\n",
-            "taxdelinquencyflag                       90275 non-null int64\n",
-            "taxdelinquencyyear                       90275 non-null float64\n",
-            "transaction_month                        90275 non-null int16\n",
-            "census_tractnumber                       90275 non-null object\n",
-            "block_number                             90275 non-null object\n",
-            "dtypes: bool(1), float64(38), int16(1), int64(3), object(2)\n",
-            "memory usage: 29.9+ MB\n",
-            "None\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "AT8Osn51lD9v",
-        "colab_type": "code",
-        "outputId": "8ab0690a-2e06-468e-b7ce-f4d051a3ce83",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 573
-        }
-      },
-      "source": [
-        "print('CURRENT DF SITUATION\\n')\n",
-        "print(f'SHAPE = {df_train.shape}')\n",
-        "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}\\n')\n",
-        "print(f'BUILDINGTYPEID HEAD\\n{df_train.buildingqualitytypeid.head()}\\n')\n",
-        "print(f'DF TRAIN HEAD\\n{df_train.head()}')"
-      ],
-      "execution_count": 192,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "CURRENT DF SITUATION\n",
-            "\n",
-            "SHAPE = (90275, 45)\n",
-            "NULL COUNT = 32911\n",
-            "VALUE COUNTS\n",
-            "7.0     29310\n",
-            "4.0     23839\n",
-            "1.0      2627\n",
-            "10.0     1461\n",
-            "12.0      119\n",
-            "8.0         5\n",
-            "6.0         2\n",
-            "11.0        1\n",
-            "Name: buildingqualitytypeid, dtype: int64\n",
-            "\n",
-            "BUILDINGTYPEID HEAD\n",
-            "0    7.0\n",
-            "1    NaN\n",
-            "2    NaN\n",
-            "3    7.0\n",
-            "4    4.0\n",
-            "Name: buildingqualitytypeid, dtype: float64\n",
-            "\n",
-            "DF TRAIN HEAD\n",
-            "   parcelid  logerror  ac_id  ...  transaction_month  census_tractnumber  block_number\n",
-            "0  11827818    0.0402    NaN  ...                  3             5315.03          1013\n",
-            "1  12123024    0.0296    NaN  ...                  3             4625.00          1017\n",
-            "2  13867327    0.0344    NaN  ...                  3             0114.01          2017\n",
-            "3  12681894    0.0060    NaN  ...                  3             6513.02          1004\n",
-            "4  12848541    0.0695    1.0  ...                  3             4087.03          1018\n",
-            "\n",
-            "[5 rows x 45 columns]\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "79bB7JKdAEtX",
-        "colab_type": "code",
-        "outputId": "32b79160-fd19-4d39-988a-fc5fcd7c3284",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 225
-        }
-      },
-      "source": [
-        "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].fillna(-1)\n",
-        "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}')"
-      ],
-      "execution_count": 193,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "NULL COUNT = 0\n",
-            "VALUE COUNTS\n",
-            "-1.0     32911\n",
-            " 7.0     29310\n",
-            " 4.0     23839\n",
-            " 1.0      2627\n",
-            " 10.0     1461\n",
-            " 12.0      119\n",
-            " 8.0         5\n",
-            " 6.0         2\n",
-            " 11.0        1\n",
-            "Name: buildingqualitytypeid, dtype: int64\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DVgF1c_p_bN1",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: break-----\n",
-        "- break 1 of 2"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "mAB9bsrPAGzQ",
-        "colab_type": "code",
-        "outputId": "d847758e-212e-4de8-85c4-89b469b71c48",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 762
-        }
-      },
-      "source": [
-        "# say we run this whole thing by buildingqualitytypeid\n",
-        "# drop building types that aren't seen at least 3 times in the data\n",
-        "# df_train = df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
-        "\n",
-        "# BACK TO cuDF\n",
-        "df_train = cudf.from_pandas(df_train)\n",
-        "\n",
-        "print(df_train.buildingqualitytypeid.value_counts())\n",
-        "print()\n",
-        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "print()\n",
-        "\n",
-        "type_ids = list(set(df_train.buildingqualitytypeid.values))\n",
-        "from time import sleep\n",
-        "safe = []\n",
-        "for tid in type_ids:\n",
-        "  print(tid)\n",
-        "  sleep(5)\n",
-        "  t = len(df_train.loc[df_train.buildingqualitytypeid == tid])\n",
-        "  if t > 3:\n",
-        "    safe.append(tid)\n",
-        "  else:\n",
-        "    print(f'{tid} count too low @ {t}')\n",
-        "for tid in type_ids:\n",
-        "  if tid not in safe:\n",
-        "    df_train = df_train.loc[df_train.buildingqualitytypeid != tid]\n",
-        "\n",
-        "print()\n",
-        "print(df_train.buildingqualitytypeid.value_counts())\n",
-        "print()\n",
-        "\n",
-        "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].replace(-1,np.nan)\n",
-        "print(df_train.buildingqualitytypeid.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "\n",
-        "# BACK TO PANDAS\n",
-        "df_train = df_train.to_pandas()"
-      ],
-      "execution_count": 194,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "-1.0     32911\n",
-            " 7.0     29310\n",
-            " 4.0     23839\n",
-            " 1.0      2627\n",
-            " 10.0     1461\n",
-            " 12.0      119\n",
-            " 8.0         5\n",
-            " 6.0         2\n",
-            " 11.0        1\n",
-            "Name: buildingqualitytypeid, dtype: int32\n",
-            "\n",
-            "0\n",
-            "(90275, 45)\n",
-            "\n",
-            "1.0\n",
-            "4.0\n",
-            "6.0\n",
-            "6.0 count too low @ 2\n",
-            "7.0\n",
-            "8.0\n",
-            "10.0\n",
-            "11.0\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-194-5024fb6909aa>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m   \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m   \u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m   \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuildingqualitytypeid\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mtid\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0msafe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    107\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    108\u001b[0m                 \u001b[0marg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 109\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple_arg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    110\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    111\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_tuple_arg\u001b[0;34m(self, arg)\u001b[0m\n\u001b[1;32m    218\u001b[0m                 \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m                 \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mas_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m         \u001b[0;31m# Step 4: Downcast\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_downcast_to_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/site-packages/cudf/core/dataframe.py\u001b[0m in \u001b[0;36mindex\u001b[0;34m(self, _index)\u001b[0m\n\u001b[1;32m   1058\u001b[0m                 \u001b[0;34m\"have %d elements\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mold_length\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1059\u001b[0m             )\n\u001b[0;32m-> 1060\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1062\u001b[0m         \u001b[0;31m# try to build an index from generic _index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 1 elements, new values have 90275 elements"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Zl7eXGt_g1uU",
-        "colab_type": "text"
-      },
-      "source": [
-        "# -----current: break-----\n",
-        "- break 2 of 2\n",
-        "  - below is last cell run"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Q3ZBSOHm-79A",
-        "colab_type": "code",
-        "outputId": "e9ddb9b3-0bb0-4cf7-fa8e-ca35b9ea7f46",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 557
-        }
-      },
-      "source": [
-        "# run cell above (currently broken) as would be in pandas\n",
-        "not_df_train = df_train.to_pandas()\n",
-        "not_df_train = not_df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
-        "\n",
-        "missing_values = fillna_knn(not_df_train, \n",
-        "                            base = ['latitude', 'longitude'], \n",
-        "                            target = 'buildingqualitytypeid')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = not_df_train['buildingqualitytypeid'].isnull()\n",
-        "not_df_train.loc[missing_values_boolflag, 'buildingqualitytypeid'] = missing_values\n",
-        "\n",
-        "print(not_df_train.buildingqualitytypeid.isnull().sum())"
-      ],
-      "execution_count": 195,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "# of miss 0\n",
-            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
-            "                     metric_params=None, n_jobs=-1, n_neighbors=15, p=2,\n",
-            "                     weights='distance')\n",
-            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 15, 'weights': 'distance'}\n",
-            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    7.1s finished\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-195-5b5613488983>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m missing_values = fillna_knn(not_df_train, \n\u001b[1;32m      5\u001b[0m                             \u001b[0mbase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'latitude'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'longitude'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m                             target = 'buildingqualitytypeid')\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m<ipython-input-189-96906960b52d>\u001b[0m in \u001b[0;36mfillna_knn\u001b[0;34m(df, base, target)\u001b[0m\n\u001b[1;32m     35\u001b[0m     \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m     \u001b[0mZ\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_values_boolflag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     38\u001b[0m     \u001b[0;31m#df.loc[ missing_values_boolflag, target ]  = Z\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     39\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/utils/metaestimators.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m         \u001b[0;31m# lambda, but not partial, allows help() to work with update_wrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 116\u001b[0;31m         \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m         \u001b[0;31m# update the docstring of the returned function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m         \u001b[0mupdate_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    455\u001b[0m         \"\"\"\n\u001b[1;32m    456\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_is_fitted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'predict'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    459\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mif_delegate_has_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdelegate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'best_estimator_'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'estimator'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/neighbors/classification.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m    145\u001b[0m             \u001b[0mClass\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mdata\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m         \"\"\"\n\u001b[0;32m--> 147\u001b[0;31m         \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'csr'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    149\u001b[0m         \u001b[0mneigh_dist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mneigh_ind\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkneighbors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    548\u001b[0m                              \u001b[0;34m\" minimum of %d is required%s.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m                              % (n_samples, array.shape, ensure_min_samples,\n\u001b[0;32m--> 550\u001b[0;31m                                 context))\n\u001b[0m\u001b[1;32m    551\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    552\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mensure_min_features\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required."
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bgXh5OATEacY",
-        "colab_type": "text"
-      },
-      "source": [
-        "# BELOW NOT (really) RUN\n",
-        "- if run, was in pandas"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oTh_XPErqkHf",
-        "colab_type": "code",
-        "outputId": "3e667bca-70c5-4b66-c7d2-12d171cb140b",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        }
-      },
-      "source": [
-        "print(df_train.heating_system_id.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "temp['heating_system_id']=temp['heating_system_id'].fillna(-1)\n",
-        "temp=temp.groupby(\"heating_system_id\").filter(lambda x: x.heating_system_id.size > 3)\n",
-        "temp['heating_system_id'] = temp['heating_system_id'].replace(-1,np.nan)\n",
-        "print(temp.heating_system_id.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "\n",
-        "missing_values=fillna_knn(temp,\n",
-        "                  base = [ 'latitude', 'longitude' ] ,\n",
-        "                  target = 'heating_system_id')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['heating_system_id'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'heating_system_id' ]  = missing_values\n",
-        "\n",
-        "\n",
-        "print(df_train.heating_system_id.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "34194\n",
-            "(90272, 45)\n",
-            "34194\n",
-            "(90266, 45)\n",
-            "# of miss 34194\n",
-            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
-            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    3.3s finished\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
-            "                     metric_params=None, n_jobs=-1, n_neighbors=15, p=2,\n",
-            "                     weights='distance')\n",
-            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 15, 'weights': 'distance'}\n",
-            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
-            "predicted output shape (34194,)\n",
-            "0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oVjNSkUYqnCt",
-        "colab_type": "code",
-        "outputId": "80fc7e87-36cd-44b7-96e9-ef0631c7d10c",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        }
-      },
-      "source": [
-        "print(df_train.ac_id.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "temp['ac_id']=temp['ac_id'].fillna(-1)\n",
-        "temp=temp.groupby(\"ac_id\").filter(lambda x: x.ac_id.size > 3)\n",
-        "temp['ac_id'] = temp['ac_id'].replace(-1,np.nan)\n",
-        "print(temp.ac_id.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "\n",
-        "missing_values=fillna_knn(temp,\n",
-        "                  base = [ 'latitude', 'longitude' ] ,\n",
-        "                  target = 'ac_id')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['ac_id'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'ac_id' ]  = missing_values\n",
-        "\n",
-        "print(df_train.ac_id.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "61492\n",
-            "(90272, 45)\n",
-            "61492\n",
-            "(90270, 45)\n",
-            "# of miss 61492\n",
-            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
-            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    2.0s finished\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
-            "                     metric_params=None, n_jobs=-1, n_neighbors=25, p=2,\n",
-            "                     weights='distance')\n",
-            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 25, 'weights': 'distance'}\n",
-            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
-            "predicted output shape (61492,)\n",
-            "0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "qTbcYbexqr0Y",
-        "colab_type": "code",
-        "outputId": "3459affa-a41a-4241-ab62-f0dfcadda039",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 278
-        }
-      },
-      "source": [
-        "#yearbuilt\n",
-        "print(df_train.yearbuilt.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "temp['yearbuilt']=temp['yearbuilt'].fillna(-1)\n",
-        "temp=temp.groupby(\"yearbuilt\").filter(lambda x: x.yearbuilt.size > 3)\n",
-        "temp['yearbuilt'] = temp['yearbuilt'].replace(-1,np.nan)\n",
-        "print(temp.yearbuilt.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "\n",
-        "missing_values=fillna_knn(temp,\n",
-        "                  base = [ 'latitude', 'longitude','buildingqualitytypeid','propertylandusetypeid' ] ,\n",
-        "                  target = 'yearbuilt')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['yearbuilt'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'yearbuilt' ]  = missing_values\n",
-        "print(df_train.yearbuilt.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "755\n",
-            "(90272, 45)\n",
-            "755\n",
-            "(90258, 45)\n",
-            "# of miss 755\n",
-            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
-            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:   44.3s finished\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "grid.best_estimator_ KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='euclidean',\n",
-            "                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,\n",
-            "                     weights='distance')\n",
-            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}\n",
-            "grid.scorer_ make_scorer(f1_score, pos_label=None, average=weighted)\n",
-            "predicted output shape (755,)\n",
-            "0\n"
-          ],
-          "name": "stdout"
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Gx1LYGmfqxLk",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#location seems to be related to building quality, (knnregressor)\n",
-        "from sklearn.model_selection import KFold\n",
-        "\n",
-        "def fillna_knnr( df, base, target):\n",
-        "    data_colnames = [ target ] + base\n",
-        "    #print(\"data_colnames\",data_colnames)\n",
-        "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
-        "    #print(\"miss\",missing_values_boolflag.head())\n",
-        "    not_missing_boolflag = ~missing_values_boolflag \n",
-        "    #print(\"not miss\",not_missing_boolflag.head())\n",
-        "    number_of_missing_val = missing_values_boolflag.sum()\n",
-        "    print(\"# of miss\",number_of_missing_val)\n",
-        "    not_missing_rows = df.loc[ not_missing_boolflag, data_colnames]\n",
-        "    #print(not_missing_rows.head())\n",
-        "    Y = not_missing_rows[target]\n",
-        "    X = not_missing_rows[base]\n",
-        "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192)\n",
-        "    metrics       = ['euclidean'] \n",
-        "    weights       = ['distance'] \n",
-        "    numNeighbors  = [5,10,15,20,25]\n",
-        "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
-        "    cv            = KFold(n_splits=3,random_state=3192,shuffle=False) \n",
-        "    grid = GridSearchCV(neighbors.KNeighborsRegressor(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='neg_mean_absolute_error',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
-        "    grid.fit(X_train ,Y_train)\n",
-        "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
-        "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
-        "    print(\"grid.best_params_\",grid.best_params_)\n",
-        "    print(\"grid.scorer_\",grid.scorer_)\n",
-        "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
-        "    y_true, y_pred = Y_test, grid.predict(X_test) \n",
-        "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
-        "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
-        "    return Z"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "pj5PXm7ozg5l",
-        "colab_type": "code",
-        "outputId": "3d42279f-221c-444c-8795-05a0832f97cd",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 606
-        }
-      },
-      "source": [
-        "#garage_sqft\n",
-        "print(df_train.garage_sqft.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.loc[df_train.garagecarcnt>0,df_train.columns].copy()\n",
-        "\n",
-        "print(temp.garage_sqft.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = [ 'latitude', 'longitude','garagecarcnt'] ,\n",
-        "                  target = 'garage_sqft')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['garage_sqft'].isnull()\n",
-        "df_train.loc[missing_values_boolflag, 'garage_sqft'] = missing_values\n",
-        "print(df_train.garage_sqft.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "text": [
-            "69255\n",
-            "(90272, 45)\n",
-            "8920\n",
-            "(29647, 45)\n",
-            "# of miss 8920\n",
-            "Fitting 3 folds for each of 5 candidates, totalling 15 fits\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n",
-            "[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    2.7s finished\n"
-          ],
-          "name": "stderr"
-        },
-        {
-          "output_type": "stream",
-          "text": [
-            "grid.best_estimator_ KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='euclidean',\n",
-            "                    metric_params=None, n_jobs=-1, n_neighbors=5, p=2,\n",
-            "                    weights='distance')\n",
-            "grid.best_params_ {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}\n",
-            "grid.scorer_ make_scorer(mean_absolute_error, greater_is_better=False)\n",
-            "predicted output shape (8920,)\n"
-          ],
-          "name": "stdout"
-        },
-        {
-          "output_type": "error",
-          "ename": "ValueError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-55-bed8646c0f85>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"predicted output shape\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmissing_values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0mmissing_values_boolflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'garage_sqft'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmissing_values_boolflag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'garage_sqft'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmissing_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     15\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgarage_sqft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m    188\u001b[0m             \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_setitem_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 190\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    191\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    192\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[0;34m(self, indexer, value)\u001b[0m\n\u001b[1;32m    609\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    610\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m                         raise ValueError('Must have equal len keys and value '\n\u001b[0m\u001b[1;32m    612\u001b[0m                                          'when setting with an iterable')\n\u001b[1;32m    613\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mValueError\u001b[0m: Must have equal len keys and value when setting with an iterable"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "b7e5CFTyzg_M",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "df_train = df_train.drop('parcelid', axis=1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "YxGquCOOzhD7",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#All the other columns with missing values seems to be  integer, will need regression to be imputed,\n",
-        "#time to get categorical variables hot encoded\n",
-        "\n",
-        "#Identify numerical columns to produce a heatmap\n",
-        "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips', 'heating_system_id','has_hottub_or_spa',\n",
-        "          'just_hottub_or_spa', 'pool_with_spa_tub_yes','pool_with_spa_tub_no','propertylandusetypeid','basement_flag'\n",
-        "          ,'fireplaceflag','taxdelinquencyflag']\n",
-        "numcols = [x for x in df_train.columns if x not in catcols]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "uVZkszJEzhHj",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#total_finished_living_area_sqft\n",
-        "\n",
-        "print(df_train.total_finished_living_area_sqft.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.total_finished_living_area_sqft.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = [ 'latitude', 'longitude','basementsqft','numberofstories','poolcnt','garagecarcnt','garage_sqft','propertylandusetypeid'] ,\n",
-        "                  target = 'total_finished_living_area_sqft')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['total_finished_living_area_sqft'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'total_finished_living_area_sqft' ] = missing_values\n",
-        "print(df_train.total_finished_living_area_sqft.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "CVrTMb92zhLX",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#total_bath\t1165\n",
-        "#full_bath\t1182\n",
-        "#half_bath\t1182\n",
-        "#roomcnt\t1416\n",
-        "#bedroomcnt\t1421\n",
-        "\n",
-        "#total_finished_living_area_sqft\n",
-        "\n",
-        "print(df_train.total_bath.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.total_bath.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
-        "                  target = 'total_bath')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['total_bath'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
-        "print(df_train.total_bath.isnull().sum())#total_bath\t1165\n",
-        "#full_bath\t1182\n",
-        "#half_bath\t1182\n",
-        "#roomcnt\t1416\n",
-        "#bedroomcnt\t1421\n",
-        "\n",
-        "#total_finished_living_area_sqft\n",
-        "\n",
-        "print(df_train.total_bath.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.total_bath.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
-        "                  target = 'total_bath')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['total_bath'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
-        "print(df_train.total_bath.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "BjIKlu-tzhPI",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# rop half_bath and full bath, as there are only redundant values of total_bath\n",
-        "df_train = df_train.drop(['full_bath','half_bath'], axis=1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "02X1y6EBzhT9",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#bedroomcnt\t1421\n",
-        "\n",
-        "print(df_train.bedroomcnt.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.bedroomcnt.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['propertylandusetypeid','total_finished_living_area_sqft','total_bath' ] ,\n",
-        "                  target = 'bedroomcnt')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['bedroomcnt'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'bedroomcnt' ] = missing_values\n",
-        "print(df_train.bedroomcnt.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "WzkZ_qeHzhXP",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "df_train['total_bath']=df_train.total_bath.round(1)\n",
-        "df_train['bedroomcnt']=df_train.bedroomcnt.round(1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "QF9DtDAczhaW",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#recalculate roomcnt\t1416 as we have used imputation for total_bath and bedroomcnt\n",
-        "\n",
-        "df_train.loc[(df_train.roomcnt.isnull()),['roomcnt']]=df_train.total_bath + df_train.bedroomcnt"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "U5N41TBlz60W",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "print(df_train.shape)\n",
-        "df_train =df_train.loc[(df_train.total_parcel_tax.notnull()) & (df_train.land_tax.notnull()),df_train.columns]\n",
-        "\n",
-        "print(df_train.shape)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "kv9h5yL3z64Q",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#lot_area_sqft\n",
-        "print(df_train.lot_area_sqft.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.lot_area_sqft.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['latitude','longitude','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
-        "                  target = 'lot_area_sqft')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['lot_area_sqft'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'lot_area_sqft' ] = missing_values.round(2)\n",
-        "print(df_train.lot_area_sqft.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "GYJLHrR4z68f",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# predict structure_tax and recalculate  total_parcel_tax = land_tax + structure_tax\n",
-        "\n",
-        "\n",
-        "print(df_train.structure_tax.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.structure_tax.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
-        "                  target = 'structure_tax')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['structure_tax'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'structure_tax' ] = missing_values.round(2)\n",
-        "print(df_train.structure_tax.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Ya-3K06Zz6_y",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#36 total_property_tax_2016 \n",
-        "\n",
-        "#total_parcel_tax = land_tax + structure_tax\n",
-        "    \n",
-        "df_train['total_parcel_tax']=df_train['structure_tax']+df_train['land_tax']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "8Fvr7voVz7DX",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#age of the property\n",
-        "df_train['age'] = 2016 - df_train['yearbuilt']\n",
-        "df_train=df_train.drop(['yearbuilt'],axis=1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "xl0EOIT-z7Gl",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#total_property_tax_2016\n",
-        "\n",
-        "\n",
-        "print(df_train.total_property_tax_2016.isnull().sum())\n",
-        "print(df_train.shape)\n",
-        "temp=df_train.copy()\n",
-        "print(temp.total_property_tax_2016.isnull().sum())\n",
-        "print(temp.shape)\n",
-        "missing_values=fillna_knnr(temp,\n",
-        "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
-        "                  target = 'total_property_tax_2016')\n",
-        "\n",
-        "print(\"predicted output shape\",missing_values.shape)\n",
-        "missing_values_boolflag = df_train['total_property_tax_2016'].isnull()\n",
-        "df_train.loc[ missing_values_boolflag, 'total_property_tax_2016' ] = missing_values.round(2)\n",
-        "print(df_train.total_property_tax_2016.isnull().sum())"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "YlaxWegqz7I-",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#check missing values\n",
-        "\n",
-        "missing_df = df_train.isnull().sum(axis=0).reset_index()\n",
-        "missing_df.columns = ['column_name', 'missing_count']\n",
-        "missing_df = missing_df.loc[missing_df['missing_count']>0]\n",
-        "missing_df = missing_df.sort_values(by='missing_count')\n",
-        "print(missing_df)\n",
-        "print(missing_df.shape)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "dIl_nqKVz7NQ",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#both the columns above miss 92% of the data, there is no related varibale to impute it, hence dropping them at this point\n",
-        "\n",
-        "df_train = df_train.drop(['finished_living_area_entryfloor_sqft2','finished_living_area_entryfloor_sqft1'], axis=1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "HQJd7rgKz7Qq",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#Identify numerical columns to produce a heatmap\n",
-        "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips','pool_with_spa_tub_no','pool_with_spa_tub_yes','has_hottub_or_spa',\n",
-        "           'just_hottub_or_spa','heating_system_id','propertylandusetypeid','basement_flag','fireplaceflag','taxdelinquencyflag']\n",
-        "numcols = [x for x in df_train.columns if x not in catcols]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "VUN3a6uJz7Ut",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# 2 variables are in object datatype, coverting into numeric\n",
-        "df_train[['census_tractnumber','block_number']] = df_train[['census_tractnumber','block_number']].apply(pd.to_numeric)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "zGx77rRAz7ZZ",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# dropping categorical columns as xgboost feature selection cannot hadle it\n",
-        "\n",
-        "train_x = df_train.drop(catcols+['logerror'], axis=1)\n",
-        "\n",
-        "train_y=df_train['logerror']\n",
-        "\n",
-        "train_x = train_x.astype(float) \n",
-        "train_y = train_y.astype(float)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "es_Ew2YJz7dT",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "pd.options.display.max_rows = 65\n",
-        "\n",
-        "dtype_df = train_x.dtypes.reset_index()\n",
-        "dtype_df.columns = [\"Count\", \"Column Type\"]\n",
-        "#dtype_df"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "bvWIhR38z7fW",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "df_train.loc[df_train.has_hottub_or_spa==True,'has_hottub_or_spa']=\"Yes\"\n",
-        "df_train.loc[df_train.has_hottub_or_spa==0,'has_hottub_or_spa']=\"No\"\n",
-        "\n",
-        "df_train.loc[df_train.just_hottub_or_spa==0,'just_hottub_or_spa']=\"No\"\n",
-        "df_train.loc[df_train.just_hottub_or_spa==1,'just_hottub_or_spa']=\"Yes\"\n",
-        "\n",
-        "df_train.loc[df_train.deck_flag==0,'deck_flag']=\"No\"\n",
-        "df_train.loc[df_train.deck_flag==1,'deck_flag']=\"Yes\"\n",
-        "\n",
-        "df_train.loc[df_train.basement_flag==0,'basement_flag']=\"No\"\n",
-        "df_train.loc[df_train.basement_flag==1,'basement_flag']=\"Yes\"\n",
-        "\n",
-        "df_train.loc[df_train.fireplaceflag==False,'fireplaceflag']=\"No\"\n",
-        "df_train.loc[df_train.fireplaceflag==True,'fireplaceflag']=\"Yes\"\n",
-        "#"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Ef9JjrmMz7jw",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#ac_id,heating_system_id,propertylandusetypeid\n",
-        "dummieslist=['has_hottub_or_spa','just_hottub_or_spa',\n",
-        "             'deck_flag','fips','basement_flag','fireplaceflag','taxdelinquencyflag']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Z51Zrt2Uz7oD",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "df_train[dummieslist] = df_train[dummieslist].astype(object)\n",
-        "dummies = pd.get_dummies(df_train[dummieslist], prefix= dummieslist)\n",
-        "dummies.shape"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "VHBi5Gg6z7tu",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "dummies2=['pool_with_spa_tub_no','pool_with_spa_tub_yes']\n",
-        "df_train[dummies2] = df_train[dummies2].astype(int)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "oocTPKI9z7rk",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "import MySQLdb\n",
-        "from sqlalchemy import create_engine\n",
-        "engineString = 'mysql+mysqldb://root:MyNewPass@localhost/sakila'\n",
-        "engine = create_engine(engineString)\n",
-        "con=engine.connect()\n",
-        "\n",
-        "with engine.connect() as con, con.begin():\n",
-        "    df_train.to_sql('df_train_f1', engine, chunksize=10000, index =False,if_exists ='replace')"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "zj5ZLSPlz7XC",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "numcols2=['basementsqft','total_bath','bedroomcnt','total_finished_living_area_sqft','fireplace_count','garagecarcnt',\n",
-        " 'garage_sqft','latitude','longitude','lot_area_sqft','poolcnt','pool_sqft','roomcnt','unitcnt','patio_sqft','storage_sqft',\n",
-        " 'numberofstories','structure_tax','total_parcel_tax','land_tax','total_property_tax_2016','taxdelinquencyyear','transaction_month',\n",
-        " 'census_tractnumber','block_number','age']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "fp53dotszhgA",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "Y=df_train['logerror']"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "O0Uaei4rzhj6",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#buildingqualitytypeid ->has order\n",
-        "le = LabelEncoder()\n",
-        "df_train['buildingqualitytypeid']=le.fit_transform(df_train.buildingqualitytypeid)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "g4-g-uvtzhds",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#df_train.ac_id.value_counts()\n",
-        "#df_train.propertylandusetypeid.value_counts()\n",
-        "#'buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid'"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "SzliXafdzhRd",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "X=pd.concat([dummies,df_train[dummies2],df_train[numcols2],df_train[['buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid']]],axis=1)\n",
-        "X.shape"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "DBsZjyQd0W1N",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10, random_state=3192)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ihXFZWcn0W5D",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#  top features\n",
-        "import xgboost as xgb\n",
-        "xgb_params = {\n",
-        "    'eta': 0.05,\n",
-        "    'max_depth': 8,\n",
-        "    'subsample': 0.7,\n",
-        "    'colsample_bytree': 0.7,\n",
-        "    'objective': 'reg:linear',\n",
-        "    'silent': 1,\n",
-        "    'seed' : 0\n",
-        "}\n",
-        "dtrain = xgb.DMatrix(X_train, Y_train, feature_names=X_train.columns.values)\n",
-        "model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=50)\n",
-        "# plot the important features #\n",
-        "fig, ax = plt.subplots(figsize=(12,18))\n",
-        "#max_num_features=50, error for no reason \n",
-        "xgb.plot_importance(model, height=0.8, ax=ax)\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "TQEEzNkX0W9w",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#top features\n",
-        "xgboost_selection=['total_finished_living_area_sqft','latitude','structure_tax','total_property_tax_2016',\n",
-        "'total_parcel_tax','land_tax','longitude','lot_area_sqft','census_tractnumber','age','total_bath','bedroomcnt',\n",
-        "'block_number','transaction_month','roomcnt','taxdelinquencyyear','unitcnt','taxdelinquencyflag_No',\n",
-        "'fips_LA','garage_sqft','pool_with_spa_tub_no','has_hottub_or_spa_No','garagecarcnt','deck_flag_No',\n",
-        "'poolcnt','pool_sqft'\n",
-        "]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "Rr_6EO4G0XEj",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# feature selection\n",
-        "#c_id,heating_system_id,propertylandusetypeid\n",
-        "from sklearn.ensemble import ExtraTreesRegressor\n",
-        "from sklearn.feature_selection import SelectFromModel\n",
-        "reg = ExtraTreesRegressor(n_estimators=500, max_depth=8, max_features='sqrt',\n",
-        "                          min_samples_split=100 ,min_samples_leaf=10, bootstrap=True,n_jobs=-1, random_state=3192)\n",
-        "reg = reg.fit(X_train, Y_train)\n",
-        "#print(\"importance\",reg.feature_importances_) \n",
-        "model = SelectFromModel(reg, prefit=True)\n",
-        "X_new = model.transform(X_train)\n",
-        "print(X_train.shape)\n",
-        "print(X_new.shape)  \n",
-        "\n",
-        "feat_names = X.columns.values\n",
-        "importances = reg.feature_importances_\n",
-        "std = np.std([tree.feature_importances_ for tree in reg.estimators_], axis=0)\n",
-        "indices = np.argsort(importances)[::-1][:26]\n",
-        "plt.figure(figsize=(12,12))\n",
-        "plt.title(\"Feature importances\")\n",
-        "plt.bar(range(len(indices)), importances[indices], color=\"r\", yerr=std[indices], align=\"center\")\n",
-        "plt.xticks(range(len(indices)), feat_names[indices], rotation='vertical')\n",
-        "plt.xlim([-1, len(indices)])\n",
-        "plt.show()"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "i4FCNOG70XIU",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "tree_selection=[\n",
-        "    'total_finished_living_area_sqft','structure_tax','total_property_tax_2016','total_bath','total_parcel_tax',\n",
-        "    'age','latitude','census_tractnumber','bedroomcnt','longitude','land_tax','propertylandusetypeid','block_number',\n",
-        "    'buildingqualitytypeid','numberofstories','heating_system_id','unitcnt','transaction_month','lot_area_sqft','roomcnt',\n",
-        "    'garage_sqft','garagecarcnt','pool_with_spa_tub_no','poolcnt','fips_LA','taxdelinquencyyear','patio_sqft',\n",
-        "    'taxdelinquencyflag_No','taxdelinquencyflag_Yes'\n",
-        "]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "TmIS1WAS0XMW",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "import matplotlib.pyplot as plt\n",
-        "from sklearn.model_selection import KFold\n",
-        "from sklearn.linear_model import Ridge,Lasso\n",
-        "from sklearn.feature_selection import RFECV\n",
-        "from sklearn.linear_model import LinearRegression\n",
-        "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer\n",
-        "\n",
-        "#model=Lasso(alpha=0.2, fit_intercept=True, normalize=True, precompute=False, copy_X=True,\n",
-        " #                                max_iter=1000, \n",
-        "  #                               tol=0.0001, warm_start=False, positive=False, random_state=3192, selection='cyclic')\n",
-        "\n",
-        "#Ridge(random_state=3192,solver='auto',fit_intercept=True,normalize=True,alpha=0.1)\n",
-        "#LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True)\n",
-        "\n",
-        "\n",
-        "rfecv = RFECV(estimator=LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True), step=2, cv=KFold(4),scoring='neg_mean_absolute_error')\n",
-        "rfecv.fit(X_train, Y_train)\n",
-        "\n",
-        "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n",
-        "\n",
-        "# Plot number of features VS. cross-validation scores\n",
-        "plt.figure()\n",
-        "plt.xlabel(\"Number of features selected\")\n",
-        "\n",
-        "plt.ylabel(\"Cross validation score (nb of correct classifications)\")\n",
-        "plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)\n",
-        "plt.show()\n"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "DIw8O00U0XPR",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "rfe_selection = [i for indx,i in enumerate(X.columns) if rfecv.support_[indx] == True]"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "gHA0x5_80XWy",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#Linear regression with rfe_selection selection\n",
-        "#rfe_selection, tree_selection, xgboost_selection\n",
-        "from sklearn.linear_model import LinearRegression\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer,mean_squared_error\n",
-        "\n",
-        "# just to check whether normalized /not normalized data gives better results\n",
-        "parameters = {'fit_intercept':[True], 'normalize':[True,False], 'copy_X':[True]}\n",
-        "scoring = {'MAE':'neg_mean_absolute_error','MSE': make_scorer(mean_squared_error,greater_is_better=False)}\n",
-        "\n",
-        "grid1 = GridSearchCV(LinearRegression(n_jobs=-1),param_grid=parameters, scoring=scoring,cv=5,refit='MAE',\n",
-        "                    return_train_score=True,\n",
-        "                    verbose=0,n_jobs=-1,pre_dispatch='n_jobs')\n",
-        "\n",
-        "grid1.fit(X_train[rfe_selection], Y_train)\n",
-        "#print(\"5. grid best_score_\",abs(grid.best_score_))\n",
-        "Y_pred = grid1.predict(X_test[rfe_selection])\n",
-        "print(\"MAE on test data\",mean_absolute_error(Y_test,Y_pred))\n",
-        "print(\"MSE on test data\",mean_squared_error(Y_test,Y_pred))\n",
-        "print(\"R Squared data \",r2_score(Y_test,Y_pred))"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "ekn4pBs60XcT",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#pca selection\n",
-        "from sklearn.decomposition import PCA\n",
-        "from sklearn.preprocessing import scale\n",
-        "import matplotlib.pyplot as plt\n",
-        "from sklearn.preprocessing import scale\n",
-        "%matplotlib inline\n",
-        "scaled_x = scale(X)\n",
-        "pca = PCA(n_components=None, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
-        "pca.fit(scaled_x)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "yFuT-wUN0XfV",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "# The amount of variance that each PC explains\n",
-        "var= pca.explained_variance_ratio_\n",
-        "#Cumulative Variance explains\n",
-        "var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)\n",
-        "print(var1)\n",
-        "plt.plot(var1)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "iPN4OBUe0XlD",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "#Looking at above plot I'm taking 28 variables\n",
-        "\n",
-        "pca = PCA(n_components=28, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
-        "pca.fit(scaled_x)\n",
-        "\n",
-        "pca1=pca.fit_transform(scaled_x)\n",
-        "\n",
-        "pca = PCA(n_components=28, copy=True, whiten=True, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
-        "pca.fit(scaled_x)\n",
-        "pca2=pca.fit_transform(scaled_x)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "EE4ednPC0XjX",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "pcaX_train, pcaX_test, pcaY_train, pcaY_test = train_test_split(pca1, Y, test_size=0.10, random_state=3192)\n",
-        "pca2X_train, pca2X_test, pca2Y_train, pca2Y_test = train_test_split(pca2, Y, test_size=0.10, random_state=3192)"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "erYMXvTG0XaK",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        "from sklearn.ensemble import GradientBoostingRegressor\n",
-        "from sklearn.metrics import mean_absolute_error,make_scorer\n",
-        "from sklearn.model_selection import GridSearchCV\n",
-        "\n",
-        "# just to check whether normalized /not normalized data gives better results\n",
-        "\n",
-        " # 0.005 for 1200 trees.\n",
-        "param_grid={'n_estimators':[1200],'max_features':[22]}\n",
-        "\n",
-        "              \n",
-        "grid13 = GridSearchCV(GradientBoostingRegressor(subsample=0.8,min_samples_leaf=50,min_samples_split=50,max_depth=9,loss='ls',criterion='friedman_mse',learning_rate=0.005,random_state=3192),\n",
-        "                     param_grid=param_grid, cv=5,refit='MAE',\n",
-        "                    return_train_score=True,\n",
-        "                    verbose=2,n_jobs=-1,pre_dispatch='n_jobs')\n",
-        "\n",
-        "grid13.fit(pcaX_train, pcaY_train)\n",
-        "print(\"5. grid best_score_\",abs(grid13.best_score_))\n",
-        "print(\"best params\",grid13.best_params_)\n",
-        "print(\"best score\",grid13.best_score_)\n",
-        "Y_pred = grid13.predict(pcaX_test)\n",
-        "print(\"MAE on test data\",mean_absolute_error(pcaY_test,Y_pred))\n",
-        "print(\"MSE on test data\",mean_squared_error(pcaY_test,Y_pred))"
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "BgtbLCcR0XUx",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "FjdSCEFP0XCM",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": 0,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WzATgLxmam5w",
-        "colab_type": "text"
-      },
-      "source": [
-        "In this competition, Zillow is asking you to predict the log-error between their Zestimate and the actual sale price, given all the features of a home. The log error is defined as\n",
-        "\n",
-        "logerror=log(Zestimate)−log(SalePrice)\n",
-        "and it is recorded in the transactions file train.csv. In this competition, you are going to predict the logerror for the months in Fall 2017. Since all the real estate transactions in the U.S. are publicly available, we will close the competition (no longer accepting submissions) before the evaluation period begins.\n",
-        "\n",
-        "Train/Test split\n",
-        "You are provided with a full list of real estate properties in three counties (Los Angeles, Orange and Ventura, California) data in 2016.\n",
-        "The train data has all the transactions before October 15, 2016, plus some of the transactions after October 15, 2016.\n",
-        "The test data in the public leaderboard has the rest of the transactions between October 15 and December 31, 2016.\n",
-        "The rest of the test data, which is used for calculating the private leaderboard, is all the properties in October 15, 2017, to December 15, 2017. This period is called the \"sales tracking period\", during which we will not be taking any submissions.\n",
-        "You are asked to predict 6 time points for all properties: October 2016 (201610), November 2016 (201611), December 2016 (201612), October 2017 (201710), November 2017 (201711), and December 2017 (201712).\n",
-        "Not all the properties are sold in each time period. If a property was not sold in a certain time period, that particular row will be ignored when calculating your score.\n",
-        "If a property is sold multiple times within 31 days, we take the first reasonable value as the ground truth. By \"reasonable\", we mean if the data seems wrong, we will take the transaction that has a value that makes more sense.\n",
-        "File descriptions\n",
-        "properties_2016.csv - all the properties with their home features for 2016. Note: Some 2017 new properties don't have any data yet except for their parcelid's. Those data points should be populated when properties_2017.csv is available.\n",
-        "properties_2017.csv - all the properties with their home features for 2017 (released on 10/2/2017)\n",
-        "train_2016.csv - the training set with transactions from 1/1/2016 to 12/31/2016\n",
-        "train_2017.csv - the training set with transactions from 1/1/2017 to 9/15/2017 (released on 10/2/2017)\n",
-        "sample_submission.csv - a sample submission file in the correct format"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "R0yrYUf7anN0",
-        "colab_type": "code",
-        "colab": {}
-      },
-      "source": [
-        ""
-      ],
-      "execution_count": 0,
-      "outputs": []
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "kkEdr1VmigyU"
+   },
+   "source": [
+    "### Install RAPIDS AI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "p129YxxnihcV"
+   },
+   "outputs": [],
+   "source": [
+    "!wget -nc https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh\n",
+    "# RAPIDS 0.10 nightly\n",
+    "!bash rapids-colab.sh \n",
+    "\n",
+    "import sys, os\n",
+    "\n",
+    "sys.path.append('/usr/local/lib/python3.6/site-packages/')\n",
+    "os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'\n",
+    "os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "1CsdVW7SU9Li"
+   },
+   "source": [
+    "# Zillow Kaggle Competition on RAPIDS AI\n",
+    "- initially based off eswar3's [Zillow prediction models]( https://github.com/eswar3/Zillow-prediction-models) repo\n",
+    "## Download Data\n",
+    "- to download the data, please plug in your kaggle api username & key\n",
+    "  - you can set up your kaggle api at `https://www.kaggle.com/YOUR USERNAME HERE/account`\n",
+    "  - learn more: https://github.com/Kaggle/kaggle-api#api-credentials"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "x1dLRTm168Tk"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install kaggle\n",
+    "!mkdir /root/.kaggle\n",
+    "\n",
+    "# plug api -- get your own API key\n",
+    "!echo '{\"username\":\"warobson\",\"key\":\"\"}' > /root/.kaggle/kaggle.json\n",
+    "!chmod 600 /root/.kaggle/kaggle.json\n",
+    "\n",
+    "# !kaggle datasets download\n",
+    "!kaggle competitions download -c zillow-prize-1\n",
+    "\n",
+    "# unzip kaggle data\n",
+    "!unzip -q \"/content/sample_submission.csv.zip\"\n",
+    "!unzip -q \"/content/train_2016_v2.csv.zip\"\n",
+    "!unzip -q \"/content/properties_2016.csv.zip\"\n",
+    "!unzip -q \"/content/train_2017.csv.zip\"\n",
+    "!unzip -q \"/content/properties_2017.csv.zip\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "LICr9uz8do9K"
+   },
+   "source": [
+    "#### How is the data saved?\n",
+    "- inside content directory "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 173
+    },
+    "colab_type": "code",
+    "id": "6n75DyJ-dm4B",
+    "outputId": "64ac687e-39d6-4bb1-f4b7-5476c9de3b84"
+   },
+   "outputs": [],
+   "source": [
+    "# display content folder contents\n",
+    "!ls \"/content/\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Lpa1b4edIXuT"
+   },
+   "source": [
+    "# Imports\n",
+    "### RAPIDS\n",
+    "* `cuDf`\n",
+    "  - words here\n",
+    "* `cuML`\n",
+    "  - words here\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "ZKN5zuROroJD"
+   },
+   "outputs": [],
+   "source": [
+    "# rapids \n",
+    "import cudf, cuml \n",
+    "# switch to cupy next update (once docker has it)\n",
+    "import numpy as np\n",
+    "# general \n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "YJeywzd2efw7"
+   },
+   "source": [
+    "## Data\n",
+    "* `properties_2016`\n",
+    "  - aprox. 27,000,000 residential properties \n",
+    "  - 58 attributes each\n",
+    "* `train_2016_v2`\n",
+    "  - 90,000 transaction records for closings in the year 2016\n",
+    "    * Merge datasets on `property_id`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 156
+    },
+    "colab_type": "code",
+    "id": "2EfApIzCfEtr",
+    "outputId": "bc1e37d1-9ab8-4561-fa39-5af420480a72"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>parcelid</th>\n",
+       "      <th>airconditioningtypeid</th>\n",
+       "      <th>architecturalstyletypeid</th>\n",
+       "      <th>basementsqft</th>\n",
+       "      <th>bathroomcnt</th>\n",
+       "      <th>bedroomcnt</th>\n",
+       "      <th>buildingclasstypeid</th>\n",
+       "      <th>buildingqualitytypeid</th>\n",
+       "      <th>calculatedbathnbr</th>\n",
+       "      <th>decktypeid</th>\n",
+       "      <th>...</th>\n",
+       "      <th>numberofstories</th>\n",
+       "      <th>fireplaceflag</th>\n",
+       "      <th>structuretaxvaluedollarcnt</th>\n",
+       "      <th>taxvaluedollarcnt</th>\n",
+       "      <th>assessmentyear</th>\n",
+       "      <th>landtaxvaluedollarcnt</th>\n",
+       "      <th>taxamount</th>\n",
+       "      <th>taxdelinquencyflag</th>\n",
+       "      <th>taxdelinquencyyear</th>\n",
+       "      <th>censustractandblock</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>10754147</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>10759547</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>27516.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>27516.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>10843547</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>650756.0</td>\n",
+       "      <td>1413387.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>762631.0</td>\n",
+       "      <td>20800.37</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>10859147</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>571346.0</td>\n",
+       "      <td>1156834.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>585488.0</td>\n",
+       "      <td>14557.57</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>10879947</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>193796.0</td>\n",
+       "      <td>433491.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>239695.0</td>\n",
+       "      <td>5725.17</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 58 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   parcelid airconditioningtypeid architecturalstyletypeid basementsqft  \\\n",
+       "0  10754147                  null                     null         null   \n",
+       "1  10759547                  null                     null         null   \n",
+       "2  10843547                  null                     null         null   \n",
+       "3  10859147                  null                     null         null   \n",
+       "4  10879947                  null                     null         null   \n",
+       "\n",
+       "   bathroomcnt  bedroomcnt buildingclasstypeid buildingqualitytypeid  \\\n",
+       "0          0.0         0.0                null                  null   \n",
+       "1          0.0         0.0                null                  null   \n",
+       "2          0.0         0.0                null                  null   \n",
+       "3          0.0         0.0                 3.0                   7.0   \n",
+       "4          0.0         0.0                 4.0                  null   \n",
+       "\n",
+       "  calculatedbathnbr decktypeid  ... numberofstories fireplaceflag  \\\n",
+       "0              null       null  ...            null          null   \n",
+       "1              null       null  ...            null          null   \n",
+       "2              null       null  ...            null          null   \n",
+       "3              null       null  ...             1.0          null   \n",
+       "4              null       null  ...            null          null   \n",
+       "\n",
+       "  structuretaxvaluedollarcnt taxvaluedollarcnt assessmentyear  \\\n",
+       "0                       null               9.0         2015.0   \n",
+       "1                       null           27516.0         2015.0   \n",
+       "2                   650756.0         1413387.0         2015.0   \n",
+       "3                   571346.0         1156834.0         2015.0   \n",
+       "4                   193796.0          433491.0         2015.0   \n",
+       "\n",
+       "  landtaxvaluedollarcnt taxamount  taxdelinquencyflag taxdelinquencyyear  \\\n",
+       "0                   9.0      null                None               null   \n",
+       "1               27516.0      null                None               null   \n",
+       "2              762631.0  20800.37                None               null   \n",
+       "3              585488.0  14557.57                None               null   \n",
+       "4              239695.0   5725.17                None               null   \n",
+       "\n",
+       "  censustractandblock  \n",
+       "0                null  \n",
+       "1                null  \n",
+       "2                null  \n",
+       "3                null  \n",
+       "4                null  \n",
+       "\n",
+       "[5 rows x 58 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# import 2016 properties\n",
+    "prop2016 = cudf.read_csv('zillow/properties_2016.csv')\n",
+    "\n",
+    "# peek display 2016 properties\n",
+    "prop2016.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 121
+    },
+    "colab_type": "code",
+    "id": "uynoUxpx8Xsn",
+    "outputId": "b64b7b32-c1f9-4cf3-c50d-36e90dc51a64"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>parcelid</th>\n",
+       "      <th>logerror</th>\n",
+       "      <th>transactiondate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>11016594</td>\n",
+       "      <td>0.0276</td>\n",
+       "      <td>2016-01-01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>14366692</td>\n",
+       "      <td>-0.1684</td>\n",
+       "      <td>2016-01-01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>12098116</td>\n",
+       "      <td>-0.0040</td>\n",
+       "      <td>2016-01-01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>12643413</td>\n",
+       "      <td>0.0218</td>\n",
+       "      <td>2016-01-02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>14432541</td>\n",
+       "      <td>-0.0050</td>\n",
+       "      <td>2016-01-02</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   parcelid  logerror transactiondate\n",
+       "0  11016594    0.0276      2016-01-01\n",
+       "1  14366692   -0.1684      2016-01-01\n",
+       "2  12098116   -0.0040      2016-01-01\n",
+       "3  12643413    0.0218      2016-01-02\n",
+       "4  14432541   -0.0050      2016-01-02"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# import train 2016  data\n",
+    "train2016 = cudf.read_csv('zillow/train_2016_v2.csv',\n",
+    "                          parse_dates=[\"transactiondate\"])\n",
+    "\n",
+    "# peek display 2016 train\n",
+    "train2016.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "gGiscxESJDrl"
+   },
+   "source": [
+    "## [Zillow Prediction Model](https://colab.research.google.com/github/eswar3/Zillow-prediction-models/blob/master/Step%202a-Approach1.ipynb)\n",
+    "\n",
+    "    In this approach the properties data and transaction data are merged together before adressing any missing values\n",
+    "\n",
+    "\n",
+    "#### Merging Data \n",
+    " - we will start by merging the two dataframes\n",
+    "  - then rename the new dataframe's attributes to be meaningful \n",
+    "    - e.g. from `pooltypeid7` to `pool_with_spa_tub_no` and `structuretaxvaluedollarcnt` to `structure_tax`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 156
+    },
+    "colab_type": "code",
+    "id": "o4CvSIcwm4B2",
+    "outputId": "4e59a51a-ebd6-4fe5-b037-3165e57e3b85"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>parcelid</th>\n",
+       "      <th>logerror</th>\n",
+       "      <th>transactiondate</th>\n",
+       "      <th>ac_id</th>\n",
+       "      <th>architecturalstyletypeid</th>\n",
+       "      <th>basement_sqft</th>\n",
+       "      <th>total_bath</th>\n",
+       "      <th>bedroomcnt</th>\n",
+       "      <th>buildingclasstypeid</th>\n",
+       "      <th>buildingqualitytypeid</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fireplaceflag</th>\n",
+       "      <th>structure_tax</th>\n",
+       "      <th>total_parcel_tax</th>\n",
+       "      <th>assessmentyear</th>\n",
+       "      <th>land_tax</th>\n",
+       "      <th>total_property_tax_2016</th>\n",
+       "      <th>taxdelinquencyflag</th>\n",
+       "      <th>taxdelinquencyyear</th>\n",
+       "      <th>censustractandblock</th>\n",
+       "      <th>transaction_month</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17129971</td>\n",
+       "      <td>0.0421</td>\n",
+       "      <td>2016-01-25</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>266718.0</td>\n",
+       "      <td>444528.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>177810.0</td>\n",
+       "      <td>5108.38</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>6.111005e+13</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>12921949</td>\n",
+       "      <td>0.0266</td>\n",
+       "      <td>2016-01-25</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>361522.0</td>\n",
+       "      <td>506127.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>144605.0</td>\n",
+       "      <td>6150.23</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>6.037404e+13</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>14502581</td>\n",
+       "      <td>-0.0060</td>\n",
+       "      <td>2016-01-25</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>170960.0</td>\n",
+       "      <td>339273.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>168313.0</td>\n",
+       "      <td>5487.92</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>6.059032e+13</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>10946127</td>\n",
+       "      <td>-0.1020</td>\n",
+       "      <td>2016-01-25</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>144440.0</td>\n",
+       "      <td>389200.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>244760.0</td>\n",
+       "      <td>4326.54</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>6.037311e+13</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>11835451</td>\n",
+       "      <td>-0.0030</td>\n",
+       "      <td>2016-01-25</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>null</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>null</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>null</td>\n",
+       "      <td>144020.0</td>\n",
+       "      <td>235739.0</td>\n",
+       "      <td>2015.0</td>\n",
+       "      <td>91719.0</td>\n",
+       "      <td>3698.87</td>\n",
+       "      <td>None</td>\n",
+       "      <td>null</td>\n",
+       "      <td>6.037530e+13</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 61 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   parcelid  logerror transactiondate ac_id architecturalstyletypeid  \\\n",
+       "0  17129971    0.0421      2016-01-25  null                     null   \n",
+       "1  12921949    0.0266      2016-01-25   1.0                     null   \n",
+       "2  14502581   -0.0060      2016-01-25  null                     null   \n",
+       "3  10946127   -0.1020      2016-01-25   1.0                     null   \n",
+       "4  11835451   -0.0030      2016-01-25  null                     null   \n",
+       "\n",
+       "  basement_sqft  total_bath  bedroomcnt buildingclasstypeid  \\\n",
+       "0          null         3.0         4.0                null   \n",
+       "1          null         3.0         4.0                null   \n",
+       "2          null         2.5         3.0                null   \n",
+       "3          null         3.0         2.0                null   \n",
+       "4          null         3.0         5.0                null   \n",
+       "\n",
+       "  buildingqualitytypeid  ...  fireplaceflag structure_tax total_parcel_tax  \\\n",
+       "0                  null  ...           null      266718.0         444528.0   \n",
+       "1                   4.0  ...           null      361522.0         506127.0   \n",
+       "2                  null  ...           null      170960.0         339273.0   \n",
+       "3                   4.0  ...           null      144440.0         389200.0   \n",
+       "4                   7.0  ...           null      144020.0         235739.0   \n",
+       "\n",
+       "   assessmentyear  land_tax total_property_tax_2016 taxdelinquencyflag  \\\n",
+       "0          2015.0  177810.0                 5108.38               None   \n",
+       "1          2015.0  144605.0                 6150.23               None   \n",
+       "2          2015.0  168313.0                 5487.92               None   \n",
+       "3          2015.0  244760.0                 4326.54               None   \n",
+       "4          2015.0   91719.0                 3698.87               None   \n",
+       "\n",
+       "  taxdelinquencyyear censustractandblock  transaction_month  \n",
+       "0               null        6.111005e+13                  1  \n",
+       "1               null        6.037404e+13                  1  \n",
+       "2               null        6.059032e+13                  1  \n",
+       "3               null        6.037311e+13                  1  \n",
+       "4               null        6.037530e+13                  1  \n",
+       "\n",
+       "[5 rows x 61 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# merge 2016 train and property dataframes by parcel id\n",
+    "df_train=''\n",
+    "df_train = train2016.merge(prop2016, how='left', on='parcelid')\n",
+    "\n",
+    "# add column inidcaticating month of transaction\n",
+    "df_train['transaction_month'] = df_train['transactiondate'].dt.month\n",
+    "\n",
+    "# set colums to be renamed for general english understandability \n",
+    "rename_these = {\"bathroomcnt\": \"total_bath\",\n",
+    "                \"fullbathcnt\": \"full_bath\",\n",
+    "                \"threequarterbathnbr\": \"half_bath\",\n",
+    "                \"yardbuildingsqft17\": \"patio_sqft\",\n",
+    "                \"yardbuildingsqft26\":\"storage_sqft\",\n",
+    "                \"decktypeid\": \"deck_flag\",\n",
+    "                \"pooltypeid7\": \"pool_with_spa_tub_no\", \n",
+    "                \"pooltypeid2\": \"pool_with_spa_tub_yes\",\n",
+    "                \"hashottuborspa\": \"has_hottub_or_spa\", \n",
+    "                \"pooltypeid10\": \"just_hottub_or_spa\",\n",
+    "                \"calculatedfinishedsquarefeet\":\"total_finished_living_area_sqft\", \n",
+    "                \"finishedsquarefeet12\": \"finished_living_area_sqft\",\n",
+    "                \"lotsizesquarefeet\": \"lot_area_sqft\",\n",
+    "                \"finishedsquarefeet50\":\"finished_living_area_entryfloor_sqft1\",\n",
+    "                \"finishedfloor1squarefeet\":\"finished_living_area_entryfloor_sqft2\",\n",
+    "                \"finishedsquarefeet6\": \"base_unfinished_and_finished_area_sqft\",\n",
+    "                \"finishedsquarefeet15\": \"total_area_sqft\",\n",
+    "                \"finishedsquarefeet13\": \"preimeter_living_area_sqft\",\n",
+    "                \"taxvaluedollarcnt\":\"total_parcel_tax\",\n",
+    "                \"landtaxvaluedollarcnt\":\"land_tax\",\n",
+    "                \"taxamount\":\"total_property_tax_2016\",\n",
+    "                \"structuretaxvaluedollarcnt\":\"structure_tax\",\n",
+    "                \"garagetotalsqft\":\"garage_sqft\",\n",
+    "                \"fireplacecnt\":\"fireplace_count\",\n",
+    "                \"buildingqualitytypeid \":\"building_quality_id\",\n",
+    "                \"heatingorsystemtypeid\":\"heating_system_id\",\n",
+    "                \"airconditioningtypeid\":\"ac_id\",\n",
+    "                \"storytypeid\": \"basement_flag\",\n",
+    "                \"basementsqft\": \"basement_sqft\",\n",
+    "                \"poolsizesum\": \"pool_sqft\",\n",
+    "                \"poolcnt\": \"pool_count\"}\n",
+    "# rename columns \n",
+    "df_train = df_train.rename(columns = rename_these)\n",
+    "\n",
+    "# what's the data frame look like?\n",
+    "df_train.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "YdtyBI2jFnJv"
+   },
+   "source": [
+    "## Conforming Attribute Values\n",
+    "### #0 boolean columns & null = 0s cases \n",
+    "* `pool_count`, `pool_with_spa_tub_no` and `pool_with_spa_tub_yes` are all binary variables, replace all NULL values with zero\n",
+    "*   `basement_flag` has values 7 & `Null` but is supposed to be bool, convert the `7`s to `1`s and the `Null`s to `0`s \n",
+    "* patio and shed variables with null values are assumed to have none\n",
+    "* deck_flag has only 2 values, `66` and `null`\n",
+    "  - convert it into binary flag\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "z3bPdNONHTYI"
+   },
+   "outputs": [],
+   "source": [
+    "# replace missing pool count values so we booling\n",
+    "the_bool_club = ['pool_count','pool_with_spa_tub_no','pool_with_spa_tub_yes',\n",
+    "                 'basement_flag','patio_sqft','storage_sqft', 'deck_flag']\n",
+    "\n",
+    "for col in the_bool_club:\n",
+    "  # convert null values to 0\n",
+    "  df_train[col]=df_train[col].fillna(0)\n",
+    "\n",
+    "# convert 7s and 66s to 1s\n",
+    "df_train['basement_flag'] = df_train['basement_flag'].replace(7, 1)\n",
+    "df_train['deck_flag'] = df_train['deck_flag'].replace(66, 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5MbGy6r7JLLD"
+   },
+   "source": [
+    "### #1 The pool\n",
+    "*   When pool is present and if it has tub/spa then `just_hottub_or_spa` = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 156
+    },
+    "colab_type": "code",
+    "id": "B3-1V93smA9A",
+    "outputId": "52e1a5d7-869a-443f-ac2d-40504992dc14"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "before\n",
+      "1.0    1161\n",
+      "Name: just_hottub_or_spa, dtype: int32\n",
+      "\n",
+      "after\n",
+      "0.0    1204\n",
+      "1.0    1161\n",
+      "Name: just_hottub_or_spa, dtype: int32\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'before\\n{df_train.just_hottub_or_spa.value_counts()}\\n')\n",
+    "\n",
+    "# if poolcnt=1 and has_hottub_or_spa=1 and just_hottub_or_spa is null\n",
+    "conditions = ((df_train['pool_count'] == 1) \n",
+    "              & (df_train['has_hottub_or_spa'] == 1) \n",
+    "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
+    "# then just_hottub_or_spa = 0\n",
+    "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
+    "\n",
+    "print(f'after\\n{df_train.just_hottub_or_spa.value_counts()}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "v6E3-_XlSGBs"
+   },
+   "source": [
+    "\n",
+    "- when `has_hottub_or_spa` is null and `just_hottub_or_spa` is null\n",
+    "  - both should be zero\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Xa12WFccSGM6"
+   },
+   "outputs": [],
+   "source": [
+    "# if both has hottub and just hottub are null\n",
+    "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
+    "              & (df_train['just_hottub_or_spa'].isna() == True))\n",
+    "# just hottub or spa = 0 \n",
+    "df_train.just_hottub_or_spa.loc[conditions] = 0\n",
+    "\n",
+    "# now, if has hottub is null and just hottub is 0 \n",
+    "conditions = ((df_train['has_hottub_or_spa'].isna() == True) \n",
+    "              & (df_train['just_hottub_or_spa'] == 0))\n",
+    "# has hottub or spa = 0 \n",
+    "df_train.has_hottub_or_spa.loc[conditions] = 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5umCCWN73qxw"
+   },
+   "source": [
+    "- when there is no pool\n",
+    "  - if there is tub/spa \n",
+    "    - then `just_hottub_or_spa`  = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 69
+    },
+    "colab_type": "code",
+    "id": "FBgs7zJm3qk-",
+    "outputId": "78c76ac5-2b7f-4f98-9615-8a335bc3214e"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0    89114\n",
+       "1.0     1161\n",
+       "Name: just_hottub_or_spa, dtype: int32"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# when poolcnt=0, has_hottub_or_spa=1\n",
+    "conditions = ((df_train['pool_count'] == 0) \n",
+    "              & (df_train['has_hottub_or_spa'] == 1))\n",
+    "# just_hottub_or_spa=1\n",
+    "df_train.just_hottub_or_spa.loc[conditions] = 1\n",
+    "\n",
+    "# let's check the values\n",
+    "df_train.just_hottub_or_spa.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "3LsRr1aoSCVx"
+   },
+   "source": [
+    "*   When there is no pool, set pool size to zero instead of na"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "NtdyXCbx0TKx"
+   },
+   "outputs": [],
+   "source": [
+    "# where there is no pool\n",
+    "conditions = df_train['pool_count']==0\n",
+    "# square footage of non existant pool is 0 \n",
+    "df_train.pool_sqft.loc[conditions] = 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "3hQFkXmAgQPY"
+   },
+   "source": [
+    "### #2 The basement\n",
+    "*    Where `basement_flag` is zero, `basement_sqft` should also be zero\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "kMuCOqAmLTmY"
+   },
+   "outputs": [],
+   "source": [
+    "# where there is no basement\n",
+    "conditions = df_train['basement_flag'] == 0\n",
+    "# fun fact: we just did this with the pool\n",
+    "df_train.basement_sqft.loc[conditions] = 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "wU6Uohb-PDYB"
+   },
+   "source": [
+    "### #3 The fireplace\n",
+    "There seems to be inconsistency between the `fireplace_flag` and `fireplace_count`\n",
+    "- 90,053 flag values are null\n",
+    "- 80,688 `fireplace_count` values are null\n",
+    "    * 9,385 (-11.5%) difference, but a boatload either way"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 52
+    },
+    "colab_type": "code",
+    "id": "OZM6lXmmpj5k",
+    "outputId": "ecf62d1d-b036-41ad-8052-a3090ae590ef"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "there are 80668 fireplace_count nulls\n",
+      "there are 90053 fireplaceflag nulls\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
+    "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "v9ZAzFoIpkSF"
+   },
+   "source": [
+    "* context driven solutions\n",
+    "  * where neither flag nor count exists, `fireplaceflag == False`\n",
+    "  *   when `fireplace_count` is more than zero `fireplaceflag` should be `True`\n",
+    "  * if `fireplaceflag == False`, the `fireplace_count` is logically `0`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 52
+    },
+    "colab_type": "code",
+    "id": "i3YRZgU_qZhA",
+    "outputId": "e45a7a96-2e1d-47d2-a0bd-48ece42cbb6e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "there are 222 fireplace_count nulls\n",
+      "there are 0 fireplaceflag nulls\n"
+     ]
+    }
+   ],
+   "source": [
+    "# null flags with null counts are zero\n",
+    "conditions = ((df_train['fireplace_count'].isna()==True) \n",
+    "              & (df_train['fireplaceflag'].isna()==True))\n",
+    "df_train.fireplaceflag.loc[conditions] = False\n",
+    "\n",
+    "# true flags for positive fireplace counts\n",
+    "conditions = df_train['fireplace_count'] > 0\n",
+    "df_train.fireplaceflag.loc[conditions] = True\n",
+    "\n",
+    "# set fireplace count nulls to 0 where false flags are\n",
+    "conditions = ((df_train['fireplace_count'].isna()==True) \n",
+    "              & (df_train['fireplaceflag']==False))\n",
+    "df_train.fireplace_count.loc[conditions] = 0\n",
+    "\n",
+    "print(f\"there are {df_train['fireplace_count'].isna().sum()} fireplace_count \\\n",
+    "nulls\\nthere are {df_train['fireplaceflag'].isna().sum()} fireplaceflag nulls\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "pYntUejosOn3"
+   },
+   "source": [
+    "### #4 The garage\n",
+    "*   Properties with no garages would have NA values for both "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "L9mGs-mK9E0Q"
+   },
+   "outputs": [],
+   "source": [
+    "garage = ['garagecarcnt', 'garage_sqft']\n",
+    "# where garage car count and garage square feet are null\n",
+    "conditions = ((df_train['garagecarcnt'].isna()==True) \n",
+    "              & (df_train['garage_sqft'].isna()==True))\n",
+    "# set both to 0\n",
+    "df_train[garage].loc[conditions] = 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "0uV115W6-ohW"
+   },
+   "source": [
+    "Exploring the data farther, we see\n",
+    "- `garage_sqft` holds over 8,900 measurements of 0 despite the garage's car count being 1 or more  \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 121
+    },
+    "colab_type": "code",
+    "id": "gbbUIbwJ-ouS",
+    "outputId": "310a4cdf-01a0-4fc3-ed1b-0e2f5e668518"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>garagecarcnt</th>\n",
+       "      <th>garage_sqft</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    garagecarcnt  garage_sqft\n",
+       "18           2.0          0.0\n",
+       "20           1.0          0.0\n",
+       "32           1.0          0.0\n",
+       "36           2.0          0.0\n",
+       "42           1.0          0.0"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# show rows where garage count and square feet don't add up\n",
+    "conditions = (df_train.garagecarcnt > 0) & (df_train.garage_sqft == 0)\n",
+    "\n",
+    "# give a display\n",
+    "df_train.loc[conditions][garage].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5I1O76QKA8Cb"
+   },
+   "source": [
+    "- these 0 values need to be null\n",
+    " - because no garage holding 1 or more cars in 2016 measured 0sqft"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "eWVtoty0A9Jt"
+   },
+   "outputs": [],
+   "source": [
+    "# where garage count and square feet don't add up\n",
+    "conditions = (df_train.garagecarcnt>0) & (df_train.garage_sqft==0)\n",
+    "# insert a NaN value\n",
+    "df_train.garage_sqft.loc[conditions] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "seb6r5wx5Bbz"
+   },
+   "source": [
+    "### #5 The bath\n",
+    "*   `total_bath` & `calculatedbathnbr` are near-duplicates w/ `calculated` having more nulls\n",
+    "  - let's drop it\n",
+    "*   if `full_bath` is null and `half_bath` is also null\n",
+    "  - let's make `total_bath` = 0 \n",
+    "      - because we can't truthfully assume it's any more "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "EgMNToed5BMu"
+   },
+   "outputs": [],
+   "source": [
+    "# drop calculated bath column\n",
+    "df_train = df_train.drop('calculatedbathnbr', axis=1)\n",
+    "\n",
+    "# if full_bath is null & half_bath is null\n",
+    "conditions = ((df_train['full_bath'].isnull()==True) \n",
+    "              & (df_train['half_bath'].isnull()==True) \n",
+    "              & (df_train['total_bath']==0))\n",
+    "# total_bath=0\n",
+    "df_train.total_bath.loc[conditions] = np.nan\n",
+    "\n",
+    "# when full_bath==total_bath, half_bath=0 \n",
+    "df_train.half_bath.loc[df_train.full_bath == df_train.total_bath] = 0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Sh8cG0pr4_hl"
+   },
+   "source": [
+    "### #6 Mode Imputation \n",
+    "* scaling down the latitude and longitide\n",
+    "  - knn imput takes more time due to the larger numbers\n",
+    "  - standardizing gives better results on most algorithms\n",
+    "    - this is a competition, we came to win"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "kitrNxKgLWUd"
+   },
+   "outputs": [],
+   "source": [
+    "df_train['latitude'] = df_train.latitude / 100000\n",
+    "df_train['longitude'] = df_train.longitude / 100000"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "y6bhRhu5YZ1d"
+   },
+   "source": [
+    "### #7 numberofstories & unitcnt & roomcnt\n",
+    "* we can devise unit count based on property land type\n",
+    "  - so we can now go ahead and correct the unit counts for each given property"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 208
+    },
+    "colab_type": "code",
+    "id": "yHZH4rMNLfBA",
+    "outputId": "97106bb4-10f2-49a9-f821-03a3972db136"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0      86035\n",
+       "2.0       2372\n",
+       "4.0        884\n",
+       "3.0        622\n",
+       "5.0          1\n",
+       "6.0          1\n",
+       "9.0          1\n",
+       "11.0         1\n",
+       "70.0         1\n",
+       "143.0        1\n",
+       "Name: unitcnt, dtype: int32"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# where room count is 0, go ahead and NaN it\n",
+    "df_train.roomcnt.loc[df_train['roomcnt'] == 0] = np.nan\n",
+    "\n",
+    "\"\"\"\n",
+    "propertylandusetypeid & unitcnt are related \n",
+    "  these are the propertylandusetypeid codes & their definitions\n",
+    "  \n",
+    "#246 -Duplex (2 Units, Any Combination)\n",
+    "#247 -Triplex (3 Units, Any Combination)\n",
+    "#248 -Quadruplex (4 Units, Any Combination)\n",
+    "#260 -Residential General\n",
+    "#261 -Single Family Residential\n",
+    "#263 -Mobile Home\n",
+    "#264 -Townhouse\n",
+    "#266 -Condominium\n",
+    "#267 -Cooperative\n",
+    "#269 -Planned Unit Development\n",
+    "#275 -Residential Common Area \n",
+    "#31 - Commercial/Office/Residential Mixed Used\n",
+    "#47 -Store/Office (Mixed Use)\n",
+    "#265 -Cluster Home\n",
+    "\"\"\"\n",
+    "\n",
+    "# one unit \n",
+    "ones = [260,261,263,264,266,267,269,275]\n",
+    "for one in ones:\n",
+    "    # adjust conditions to one unit indicator\n",
+    "    conditions = ((df_train['propertylandusetypeid'] == one) \n",
+    "                  & (df_train['unitcnt'].isna()))\n",
+    "    df_train.unitcnt.loc[conditions] = 1\n",
+    "\n",
+    "# two units \n",
+    "twos = [31,47,246]\n",
+    "for two in twos:\n",
+    "    # adjust conditions to two unit indicator\n",
+    "    conditions = ((df_train['propertylandusetypeid'] == two) \n",
+    "                  & (df_train['unitcnt'].isna()))\n",
+    "    df_train.unitcnt.loc[conditions] = 2\n",
+    "\n",
+    "# three units\n",
+    "conditions = ((df_train['propertylandusetypeid'] == 247) \n",
+    "              & (df_train['unitcnt'].isna()))\n",
+    "df_train.unitcnt.loc[conditions] = 3\n",
+    "\n",
+    "# four units\n",
+    "conditions = ((df_train['propertylandusetypeid'] == 248) \n",
+    "              & (df_train['unitcnt'].isna()))\n",
+    "df_train.unitcnt.loc[conditions] = 4\n",
+    "\n",
+    "# let's see how out unit counts look\n",
+    "df_train.unitcnt.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "02yLicmxLs3C"
+   },
+   "source": [
+    "### #8 Time to Cut\n",
+    "**Because of the adjustments made so far a number of columns are no longer needed**\n",
+    "*  transaction date column is no longer of use\n",
+    "  - and can be dropped \n",
+    "* `preimeter_living_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+    "  - except that `preimeter_living_area_sqft` has more duplicates\n",
+    "* `total_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+    "  - except that \"total_area_sqft\" has more duplicates\n",
+    "* `total_finished_living_area_sqft` and `finished_living_area_sqft` have the same values \n",
+    "  - except that `finished_living_area_sqft` has more duplicates\n",
+    "* `base_unfinished_and_finished_area_sqft` and `total_finished_living_area_sqft` have the same values \n",
+    "  - except that `base_unfinished_and_finished_area_sqft` has more duplicates\n",
+    "* different counties follow different land use code\n",
+    "  - to compare different counties, zillow has created it's own `propertylandusetypeid`\n",
+    "    - hence we can drop `propertycountylandusecode`\n",
+    "    - the same applies to `propertyzoningdesc`\n",
+    "* Most zip id's either invalid or out of city\n",
+    "  - since enough information about location is given in latitude and longitude \n",
+    "    - let's drop other location related fields\n",
+    "      - `regionidcity`\n",
+    "      - `regionidzip`\n",
+    "      - `regionidneighborhood`\n",
+    "* `assessmentyear` has a constant value for all rows\n",
+    "  - let's drop it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "OtOgzOqHLyid"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BEFORE: (90275, 60)\n",
+      "AFTER:  (90275, 48)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"BEFORE: {df_train.shape}\")\n",
+    "\n",
+    "# collect columns to drop\n",
+    "cut = ['propertyzoningdesc','propertycountylandusecode',\n",
+    "       'base_unfinished_and_finished_area_sqft','finished_living_area_sqft',\n",
+    "       'total_area_sqft','preimeter_living_area_sqft','regionidzip',\n",
+    "       'regionidcity','regionidneighborhood','assessmentyear','transactiondate',\n",
+    "       'censustractandblock']\n",
+    "# cut columns form dataframe\n",
+    "df_train = df_train.drop(cut, axis=1)\n",
+    "\n",
+    "print(f\"AFTER:  {df_train.shape}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "icDvpvSD6BSb"
+   },
+   "source": [
+    "### #9 Tax, Year, & Census\n",
+    "-  if tax deliquency flag is null, assume there is no unpaid tax on the property\n",
+    "  - an issue arrises here because `taxdelinquencyflag` is a `StringColumn`\n",
+    "    - i.e. null values indicate no tax delinquency, all other values are `Y` for yes\n",
+    "    - because of this, the normal method of.."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 311
+    },
+    "colab_type": "code",
+    "id": "8lYcO_T5XKNN",
+    "outputId": "596cfad3-890d-4241-b8b8-347673082a7f"
+   },
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "fill_value must be a string or a string series",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-20-739a1ab730d9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# how we'd normally take care of this\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'taxdelinquencyflag'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/anaconda3/envs/rapidsenv/lib/python3.7/site-packages/cudf/core/series.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, value, method, axis, inplace, limit)\u001b[0m\n\u001b[1;32m   1186\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"The axis keyword is not supported\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1187\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1188\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfillna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1189\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1190\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/envs/rapidsenv/lib/python3.7/site-packages/cudf/core/column/string.py\u001b[0m in \u001b[0;36mfillna\u001b[0;34m(self, fill_value, inplace)\u001b[0m\n\u001b[1;32m    719\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_column\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mStringColumn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    720\u001b[0m         ):\n\u001b[0;32m--> 721\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"fill_value must be a string or a string series\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    722\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    723\u001b[0m         \u001b[0;31m# replace fill_value with nvstrings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mTypeError\u001b[0m: fill_value must be a string or a string series"
+     ]
+    }
+   ],
+   "source": [
+    "# how we'd normally take care of this\n",
+    "df_train['taxdelinquencyflag'].fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "tA6xG6h59rLi"
+   },
+   "source": [
+    "- ...comes with error. \n",
+    "  - Why?\n",
+    "    - the series we are trying to fill the null values of is a string series\n",
+    "      - because of this `.fillna()` requires a sting value (e.g. '0') instead of an int value (e.g. 0)\n",
+    "  - So, what now?\n",
+    "    - there is an easy and straightforward solution with masked assigning!! \n",
+    "      - First\n",
+    "        - switch 1 (current True, actual False) to -1\n",
+    "      - Then\n",
+    "        - switch 0 (current False, actual True) to 1 to reflect True status\n",
+    "      - Finally\n",
+    "        - switch -1 (old True, actual False) to 0 to reflect False status"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 69
+    },
+    "colab_type": "code",
+    "id": "Svp6J0cJ5dL0",
+    "outputId": "03862711-e104-4954-bf9c-61bd51b3a9e3"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    88492\n",
+       "1     1783\n",
+       "Name: taxdelinquencyflag, dtype: int32"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# if bool 'Y'/None is already set, change string to int bool column via .isna()\n",
+    "df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].isna()\n",
+    "\n",
+    "# next we must correct the values, with 1 (True) for 'Y' and 0 for no\n",
+    "switcharoo = [(1,-1),(0,1),(-1,0)]\n",
+    "# switch values in order\n",
+    "for pair in switcharoo:\n",
+    "  # tag old value and new value it will be replaced with\n",
+    "  old, new = pair\n",
+    "  # replace old value with new value\n",
+    "  df_train['taxdelinquencyflag'] = df_train['taxdelinquencyflag'].replace(old, new)\n",
+    "    \n",
+    "# display values in tax delinquency flag column\n",
+    "df_train['taxdelinquencyflag'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "w5EAdWXaCTRU"
+   },
+   "source": [
+    "- Convert years\n",
+    "  - from yy\n",
+    "    - to 2016 - yyyy \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 243
+    },
+    "colab_type": "code",
+    "id": "6Bic66I9LfGC",
+    "outputId": "baaa5387-bbd7-4242-a336-0b6b90606935"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.0     88492\n",
+       "2.0       628\n",
+       "1.0       518\n",
+       "3.0       210\n",
+       "4.0       154\n",
+       "6.0        89\n",
+       "5.0        85\n",
+       "7.0        63\n",
+       "8.0        24\n",
+       "9.0         8\n",
+       "10.0        3\n",
+       "17.0        1\n",
+       "Name: taxdelinquencyyear, dtype: int32"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# no delinquency? set year to 0\n",
+    "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyflag == 0] = 0\n",
+    "\n",
+    "# collect x and xx formatted delinquency years w/ matching xxxx year format pair\n",
+    "year_pairs = [(99,1999), (6,2006), (7,2007), (8,2008), (9,2009), (10,2010),\n",
+    "             (11,2011), (12,2012), (13,2013), (14,2014), (15,2015)]\n",
+    "# go through the pairs individually \n",
+    "for year in year_pairs:\n",
+    "  # split the pair in question \n",
+    "  old, new = year\n",
+    "  # replace old year (e.g. 99) with new year (e.g. 1999)\n",
+    "  df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear == old] = new\n",
+    "\n",
+    "# adjust delinquency year relative to training year (2016) \n",
+    "df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0] = 2016 - df_train.taxdelinquencyyear.loc[df_train.taxdelinquencyyear>0]\n",
+    "\n",
+    "# what've we got? \n",
+    "df_train.taxdelinquencyyear.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "ya7xLHzdGVcs"
+   },
+   "source": [
+    "- values in `rawcensustractandblock` represent multiple fields concatened together as float values\n",
+    "  - by converting those values to string we can split each and build new columns:\n",
+    "    - `census_tractnumber`\n",
+    "    - `block_number`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ttt=df_train.copy()\n",
+    "df_train=ttt.copy()\n",
+    "\n",
+    "# origional column\n",
+    "\"\"\"\n",
+    "\n",
+    "# both are float columns now\n",
+    "#rawcensustractandblock\n",
+    "s_rawcensustractandblock=df_train.rawcensustractandblock.apply(lambda x: str(x))\n",
+    "\n",
+    "df_train['census_tractnumber']=s_rawcensustractandblock.str.slice(4,11)\n",
+    "df_train['block_number']=s_rawcensustractandblock.str.slice(start=11)\n",
+    "df_train['block_number']=df_train['block_number'].apply(lambda x: x[:4]+'.'+x[4:]+'0' )\n",
+    "df_train['block_number']=df_train['block_number'].apply(lambda x: int(round(float(x),0)) )\n",
+    "df_train['block_number']=df_train['block_number'].apply(lambda x: str(x).ljust(4,'0') )\n",
+    "\n",
+    "#droping censustractandblock since this is just a duplicate of rawcensustractandblock\n",
+    "df_train=df_train.drop('censustractandblock', axis=1)\n",
+    "\n",
+    "# drooping rawcensustractandblock, since it's already stored as substrings in different column names\n",
+    "df_train=df_train.drop('rawcensustractandblock', axis=1)\n",
+    "\n",
+    "\"\"\"\n",
+    "pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 489
+    },
+    "colab_type": "code",
+    "id": "Sg0eN-K1QdZy",
+    "outputId": "a90de47f-5c88-4834-df44-75a9dedcd07c"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>census_tractnumber</th>\n",
+       "      <th>block_number</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0053.03</td>\n",
+       "      <td>2043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4037.03</td>\n",
+       "      <td>2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0320.48</td>\n",
+       "      <td>3000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3107.02</td>\n",
+       "      <td>1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5303.01</td>\n",
+       "      <td>2001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  census_tractnumber block_number\n",
+       "0            0053.03         2043\n",
+       "1            4037.03         2000\n",
+       "2            0320.48         3000\n",
+       "3            3107.02         1000\n",
+       "4            5303.01         2001"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# copy rawcensustractandblock with values as string instead of float\n",
+    "string_data = cudf.Series(df_train['rawcensustractandblock'].values_to_string())\n",
+    "\n",
+    "# print(type(string_data))\n",
+    "# print(len(string_data))\n",
+    "# print(string_data)\n",
+    "\n",
+    "# \"\"\"\n",
+    "# CURRENT ERROR IN CONVERSION OF VALUES\n",
+    "# \"\"\"\n",
+    "# print(f\"\\nNOTE: THERE APPEARS TO BE AN ERROR WHEN CONVERTING TO STRING\\n\"\n",
+    "#       f\"  > somewhat random numbers added to end of some values\\n    >> e.g. 004, 006\"\n",
+    "#       f\"\\n\\n\\ndf_train['rawcensustractandblock'].head(10).values\\n\"\n",
+    "#       f\"{df_train['rawcensustractandblock'].head(10).values}\\n\\n\"\n",
+    "#       f\"data.head(10).values\\n{string_data.head(10).values}\\n\\n\\n\"\n",
+    "#       f\"THE SAME NUMBERS OCCOUR IN THE FIRST WHEN PUT INTO A LIST\\n\"\n",
+    "#       f\"  > not sure how to deal with this now\\n\"\n",
+    "#       f\"    >> difficult to reproduce without data\\n\\n\")\n",
+    "# \"\"\"\n",
+    "# CURRENT ERROR IN CONVERSION OF VALUES\n",
+    "# \"\"\"\n",
+    "\n",
+    "# set new tract number \n",
+    "df_train['census_tractnumber'] = string_data.str.slice(4, 11)\n",
+    "\n",
+    "# set/adjust block number\n",
+    "df_train['block_number'] = string_data.str.slice(11)\n",
+    "df_train['block_number'] = df_train.block_number.str.slice(0,4).str.cat(df_train.block_number.str.slice(4), '.')\n",
+    "df_train['block_number'] = df_train.block_number.astype('float').round(0).astype('int')\n",
+    "df_train['block_number'] = df_train.block_number.astype('str').str.ljust(4, '0')\n",
+    "\n",
+    "# drop raw census tract and block column, no longer needed\n",
+    "df_train = df_train.drop('rawcensustractandblock', axis=1)\n",
+    "\n",
+    "\"\"\"\n",
+    "CORRECT NUMBERS THAT SHOULD BE DISPLAYED BY BELOW PRINT STATEMENT\n",
+    "  > currently not being seen due to prior mentioned error\n",
+    "\n",
+    "tractnumber\n",
+    "0    1066.46\n",
+    "1    0524.22\n",
+    "2    4638.00\n",
+    "3    2963.00\n",
+    "4    0423.38\n",
+    "dtype: object\n",
+    "\n",
+    "blocknumber\n",
+    "0    1001\n",
+    "1    2024\n",
+    "2    3004\n",
+    "3    2002\n",
+    "4    1006\n",
+    "dtype: object\n",
+    "\"\"\"\n",
+    "df_train[['census_tractnumber', 'block_number']].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "T71orw51lpTN"
+   },
+   "source": [
+    "## Dealing with Missing Values\n",
+    "### #1 Setting standards\n",
+    "- Despite corecting and adjusting the data to this point, there are still some columns holding a large majority of null values\n",
+    "- For some columns, this majority represents over 95% of values\n",
+    "  - Let's identify those columns\n",
+    "    - And drop columns with more than 95% null values  \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 86
+    },
+    "colab_type": "code",
+    "id": "xhCosNpXvTVU",
+    "outputId": "2d969756-decb-4912-94f6-19836eb0323a"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                       field  percentage\n",
+      "7        buildingclasstypeid    0.999823\n",
+      "3   architecturalstyletypeid    0.997109\n",
+      "33    typeconstructiontypeid    0.996688\n"
+     ]
+    }
+   ],
+   "source": [
+    "# calculate null value % for each column & frame it\n",
+    "missingvalues_prop = (df_train.isnull().sum()/len(df_train)).reset_index()\n",
+    "missingvalues_prop.columns = ['field','percentage']\n",
+    "\n",
+    "# sort by null values percentage, from highest % to lowest\n",
+    "missingvalues_prop = missingvalues_prop.sort_values(by='percentage', \n",
+    "                                                    ascending=False)\n",
+    "# identify columns with > 95% of values null\n",
+    "missingvaluescols = missingvalues_prop.loc[missingvalues_prop['percentage'] > 0.95]\n",
+    "\n",
+    "# display columns with highest % null values\n",
+    "print(missingvaluescols)\n",
+    "\n",
+    "# drop columns with more than 95% null values\n",
+    "df_train = df_train.drop(missingvaluescols['field'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "az6t2ntBCMRe"
+   },
+   "source": [
+    "### #2 Working with Remaining Values\n",
+    "- the majority of values still missing in unitcnt are rows were `propertylandusetypeid` = 265, \n",
+    "  - which is Cluster Home (i.e. group of houses with shared walls)\n",
+    "    - each cluster is anywhere between 5 to 25 units\n",
+    "      - here we will asssume 10 units as reassonable count"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 225
+    },
+    "colab_type": "code",
+    "id": "yB2lzAyopS_S",
+    "outputId": "db6c7add-5452-4535-8948-a426654851b7"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0      86035\n",
+       "2.0       2372\n",
+       "4.0        884\n",
+       "3.0        622\n",
+       "10.0       356\n",
+       "5.0          1\n",
+       "6.0          1\n",
+       "9.0          1\n",
+       "11.0         1\n",
+       "70.0         1\n",
+       "143.0        1\n",
+       "Name: unitcnt, dtype: int32"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# highly related propertylandusetypeid\n",
+    "df_train['unitcnt'].loc[df_train['propertylandusetypeid'] == 265] = 10\n",
+    "\n",
+    "# let's see what we've got\n",
+    "df_train['unitcnt'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "iR1rBlz-dOdH"
+   },
+   "source": [
+    "- a number of pool sizes are null despite there being a pool\n",
+    "  - let's calculate the average pool size\n",
+    "    - and assume those null values are pools of average size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "-icFDeLSoJwl",
+    "outputId": "b1ed39c3-3a14-4dc1-eb48-b3429da5cffe"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "16932\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how's it look before?\n",
+    "print(df_train.pool_sqft.isna().sum())\n",
+    "\n",
+    "# calculate the average pool square footage for properties with a pool(s)\n",
+    "poolsizesum_mean = df_train.pool_sqft.loc[df_train['pool_count'] > 0].mean()\n",
+    "\n",
+    "# where the property has a pool(s) but pool square feet is 0\n",
+    "conditions = ((df_train['pool_count'] > 0) \n",
+    "              & (df_train['pool_sqft'].isna()==True))\n",
+    "\n",
+    "# set pool square feet to the average pool square footage of pool properties\n",
+    "df_train['pool_sqft'].loc[conditions] = poolsizesum_mean\n",
+    "\n",
+    "# display new null count\n",
+    "df_train.pool_sqft.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "AyGeXJfEmJBU"
+   },
+   "source": [
+    "- total parcel tax\n",
+    "- structure tax\n",
+    "- land tax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "393"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how many rows have values in total parcel tax that do not add up given land tax and structure tax\n",
+    "len(df_train.loc[df_train['total_parcel_tax'] != df_train['land_tax'] + df_train['structure_tax']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6\n",
+      "380\n",
+      "1\n",
+      "1\n",
+      "\n",
+      "6\n",
+      "380\n",
+      "1\n",
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df_train.total_property_tax_2016.isnull().sum())\n",
+    "print(df_train.structure_tax.isnull().sum())\n",
+    "print(df_train.total_parcel_tax.isnull().sum())\n",
+    "print(df_train.land_tax.isnull().sum())\n",
+    "print()\n",
+    "\n",
+    "# where land tax is not a null value\n",
+    "condition_1 = df_train.land_tax.isnull() == False\n",
+    "# where total parceltax is not a null value\n",
+    "condition_2 = df_train.total_parcel_tax.isnull()==False\n",
+    "\n",
+    "# pull the total parcel tax column\n",
+    "total_parcel_tax_not_null = df_train.loc[condition_1 & condition_2, 'total_parcel_tax']\n",
+    "# pull the land tax column\n",
+    "land_tax_not_null = df_train.loc[condition_1 & condition_2, 'land_tax']\n",
+    "\n",
+    "# total_parcel_tax = structure_tax + land_tax\n",
+    "# -> structure_tax = total_parcel_tax - land_tax\n",
+    "correct_structure_tax = total_parcel_tax_not_null - land_tax_not_null\n",
+    "\n",
+    "# set the structure_tax values in rows where total and land taxes are not null to these correct values \n",
+    "df_train['structure_tax'].loc[condition_1 & condition_2] = correct_structure_tax\n",
+    "\n",
+    "# where structure tax is still 0, there isn't structure tax\n",
+    "df_train['structure_tax'].loc[df_train['structure_tax'] == 0] = np.nan\n",
+    "\n",
+    "print(df_train.total_property_tax_2016.isnull().sum())\n",
+    "print(df_train.structure_tax.isnull().sum())\n",
+    "print(df_train.total_parcel_tax.isnull().sum())\n",
+    "print(df_train.land_tax.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "380"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# how many rows have values in total parcel tax that do not add up given land tax and structure tax\n",
+    "len(df_train.loc[df_train['total_parcel_tax'] != df_train['land_tax'] + df_train['structure_tax']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "8SID48LOpYvu",
+    "outputId": "6d20a3ba-4360-4554-908d-f6d673aece12"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(90275, 45)"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
     }
-  ]
-}
\ No newline at end of file
+   ],
+   "source": [
+    "# regionidcounty is exact copy of fips code, dropping the dulicate column\n",
+    "df_train = df_train.drop(['regionidcounty'], axis=1)\n",
+    "df_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "tWmM2J8_pkg1",
+    "outputId": "6362e07f-e363-4884-b0c5-9380b5fee956"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1421\n",
+      "0\n",
+      "0\n",
+      "1421\n"
+     ]
+    }
+   ],
+   "source": [
+    "#*******************************\n",
+    "#bedroomcnt #1421 zero bed room houses ??, observed it's missing all other room count also missing\n",
+    "# where there is no bedroom, null is a better representation \n",
+    "\n",
+    "# before\n",
+    "print(len(df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0]))\n",
+    "print(df_train.bedroomcnt.isnull().sum())\n",
+    "\n",
+    "df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0] = np.nan\n",
+    "\n",
+    "# after\n",
+    "print(len(df_train['bedroomcnt'].loc[df_train['bedroomcnt'] == 0]))\n",
+    "print(df_train.bedroomcnt.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Room Count\n",
+    "caluculate full bath and half bath again from total bath as it has few extra columns (fixes 500 missing values in roomcnt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 208
+    },
+    "colab_type": "code",
+    "id": "3qnP2L9LpmeJ",
+    "outputId": "c0eabce4-3232-4435-8733-779526f18c57"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1165\n",
+      "1182\n",
+      "1182\n",
+      "1421\n",
+      "69700\n",
+      "\n",
+      "1165\n",
+      "1182\n",
+      "1182\n",
+      "1421\n",
+      "1416\n"
+     ]
+    }
+   ],
+   "source": [
+    "# propertylandusetypeid & total living area\n",
+    "#                              total_bath           1165\n",
+    "#                              full_bath           1182\n",
+    "#                              half_bath           1182\n",
+    "#                                bedroomcnt      1421\n",
+    "#                              roomcnt           1416\n",
+    "\n",
+    "print(df_train.total_bath.isna().sum())\n",
+    "print(df_train.full_bath.isnull().sum())\n",
+    "print(df_train.half_bath.isnull().sum())\n",
+    "print(df_train.bedroomcnt.isnull().sum())\n",
+    "print(df_train.roomcnt.isnull().sum())\n",
+    "print()\n",
+    "\n",
+    "# roomcnt = (full_bath + half_bath) + bedroomcnt\n",
+    "# total_bath = fullbath+ 0.5(half_bath)\n",
+    "\n",
+    "# where full & half bath and bedroom count are not null, but room count is null\n",
+    "conditions = ((df_train['full_bath'].isna() == False) \n",
+    "              & (df_train['half_bath'].isna() == False) \n",
+    "              & (df_train['bedroomcnt'].isna() == False) \n",
+    "              & (df_train['roomcnt'].isna() == True))\n",
+    "\n",
+    "# calculate room count including all full & half baths along with bedroom count\n",
+    "new_values = df_train.full_bath.loc[conditions] + df_train.half_bath.loc[conditions] + df_train.bedroomcnt.loc[conditions]\n",
+    "\n",
+    "# df_train['roomcnt'] = df_train['roomcnt'].masked_assign(new_values, conditions)\n",
+    "df_train.roomcnt.loc[conditions] = new_values\n",
+    "\n",
+    "\n",
+    "# most bedroom count and roomcount null are in same place\n",
+    "# all column null count 1133 all columns are null\n",
+    "\n",
+    "print(df_train.total_bath.isna().sum())\n",
+    "print(df_train.full_bath.isnull().sum())\n",
+    "print(df_train.half_bath.isnull().sum())\n",
+    "print(df_train.bedroomcnt.isnull().sum())\n",
+    "print(df_train.roomcnt.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Mvy51Ckev9CX"
+   },
+   "source": [
+    "- correct number of stories by Zillow's `propertylandusetypeid` indicator\n",
+    "  - where null values are not\n",
+    "    - number of stories can be set to mode\n",
+    "  - where there are null values\n",
+    "    - number of stories can be set to the generally accepted number of stories"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 260
+    },
+    "colab_type": "code",
+    "id": "IW4CG2InpolD",
+    "outputId": "02375307-54e2-432b-8b87-1397c73d56b2"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BEFORE\n",
+      "1.0    12016\n",
+      "2.0     8044\n",
+      "3.0      508\n",
+      "4.0        2\n",
+      "Name: numberofstories, dtype: int32\n",
+      "69705 remaining null values\n",
+      "\n",
+      "AFTER\n",
+      "1.0    20154\n",
+      "2.0      423\n",
+      "3.0        4\n",
+      "Name: numberofstories, dtype: int32\n",
+      "69694 remaining null values\n"
+     ]
+    }
+   ],
+   "source": [
+    "# before (what's it look like?)\n",
+    "print(f'BEFORE\\n{df_train.numberofstories.value_counts()}\\n'\n",
+    "      f'{df_train.numberofstories.isnull().sum()} remaining null values\\n')\n",
+    "\n",
+    "#numberofstories\t69705\n",
+    "\n",
+    "# store ids and general number of stories \n",
+    "zillow_type_ids = [(31,2), (246,2), (247,2), (248,2), (260,2), (261,1), \n",
+    "                   (263,1), (266,1), (267,1), (269, 2), (275,1)]\n",
+    "\n",
+    "# go through each id pair \n",
+    "for type_id in zillow_type_ids:\n",
+    "    # split the pair into type id and number of stories\n",
+    "    t_id, n_stories = type_id\n",
+    "\n",
+    "    # when type id matches and story count is not null\n",
+    "    conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
+    "                  & (df_train['numberofstories'].isna() == False))\n",
+    "\n",
+    "    # calculate the mode story count for matching id properties\n",
+    "    mode_stories = df_train.numberofstories.loc[conditions].value_counts()\n",
+    "    \n",
+    "    # when there is at least one value in the value_counts of this property type\n",
+    "    if len(mode_stories) > 0:\n",
+    "        # set mode stories to the most popular value\n",
+    "        mode_stories = mode_stories[0]\n",
+    "    # otherwise\n",
+    "    else:\n",
+    "        # set mode stories to the general average for this property type\n",
+    "        mode_stories = n_stories\n",
+    "\n",
+    "    # and set those non null values to the most common value seen\n",
+    "    df_train['numberofstories'].loc[conditions] = mode_stories\n",
+    "\n",
+    "    # when type id matches and story count is null\n",
+    "    conditions = ((df_train['propertylandusetypeid'] == t_id) \n",
+    "                  & (df_train['numberofstories'].isna() == False))\n",
+    "    # set null values to the common number of stories seen in that type id\n",
+    "    df_train['numberofstories'].loc[conditions] = n_stories\n",
+    "\n",
+    "# edge cases\n",
+    "conditions = ((df_train.propertylandusetypeid==264) \n",
+    "              & (df_train.numberofstories.isnull()))\n",
+    "df_train.numberofstories.loc[conditions] = 2\n",
+    "\n",
+    "# what's it looking like? \n",
+    "print(f'AFTER\\n{df_train.numberofstories.value_counts()}\\n'\n",
+    "      f'{df_train.numberofstories.isnull().sum()} remaining null values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 295
+    },
+    "colab_type": "code",
+    "id": "AHcMsDCxprd4",
+    "outputId": "30481b2c-e035-4478-d62f-63e10a09c17e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "BEFORE\n",
+      "0.0    80446\n",
+      "1.0     8165\n",
+      "2.0     1106\n",
+      "3.0      312\n",
+      "4.0       21\n",
+      "5.0        3\n",
+      "Name: fireplace_count, dtype: int32\n",
+      "222 remaining null values\n",
+      "\n",
+      "AFTER\n",
+      "0.0       80446\n",
+      "8165.0     9607\n",
+      "1.0         222\n",
+      "Name: fireplace_count, dtype: int32\n",
+      "0 remaining null values\n"
+     ]
+    }
+   ],
+   "source": [
+    "# before (what's it looking like?) \n",
+    "print(f'BEFORE\\n{df_train.fireplace_count.value_counts()}\\n'\n",
+    "      f'{df_train.fireplace_count.isnull().sum()} remaining null values\\n')\n",
+    "\n",
+    "# where there is a fire place, and count is not null\n",
+    "conditions = ((df_train.fireplaceflag==1) \n",
+    "              & (df_train.fireplace_count.isna() == False))\n",
+    "# calculate the mode fireplace count \n",
+    "mode_fire_count = df_train.loc[conditions, 'fireplace_count'].value_counts()[0]\n",
+    "# and set those non null values to the most common fireplace count\n",
+    "df_train['fireplace_count'].loc[conditions] = mode_fire_count\n",
+    "\n",
+    "# where there is a fire place, and count is null\n",
+    "conditions = ((df_train.fireplaceflag==1) \n",
+    "              & (df_train.fireplace_count.isna() == True))\n",
+    "# set null values to the most common fireplace count\n",
+    "df_train.fireplace_count.loc[conditions] = 1\n",
+    "\n",
+    "# after\n",
+    "print(f'AFTER\\n{df_train.fireplace_count.value_counts()}\\n'\n",
+    "      f'{df_train.fireplace_count.isnull().sum()} remaining null values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 317
+    },
+    "colab_type": "code",
+    "id": "FIuSWoJspt3H",
+    "outputId": "cb11c3a1-1658-4bce-cbde-a1a47ccdc0a8"
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEsCAYAAAD6lXULAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deVgT594+8DsEgqggBgHjcqriAVMVURBcsFYURYxrbVXcrVqta9UqKq94UPRg8eC+tPbVWqvnqFVRRMXlfWvrcelpa62ivsrBlQCagODGkszvD3/kGFkMyCQE7891cV1knknmO0+We+bJZEYiCIIAIiIikdhYugAiIqreGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCoGzVto7dq18PLyKvY3ZswYS5dWrSUlJSEkJAStWrVCcHBwifPcvn3b6DlRKpV4//33sWjRImRlZZV7mYcPH8aBAweKTR82bBg+++yzcj8elc/Fixexbt06S5dhcbaWLoAsw9HREVu2bCk2jcRRWFiIefPmISgoCEuXLkWtWrXKnH/+/Pnw8fGBTqdDSkoK4uLioFar8dVXX5VruYcPH8bTp08xYMCANymfKujixYvYsGEDpk6daulSLIpB85aSSqXw8fExef7nz5+jRo0aIlZUvaWnp+Pp06fo168f/Pz8Xjt/s2bNDM+Pr68vnj9/juXLl1v8ebD08sk6ceiMiiksLISXlxe++eYbLF26FB06dDDaIk5KSsKgQYPQunVrBAYGIjY2FoWFhUaPkZiYiJ49e8Lb2xsjR47EpUuX4OXlhfj4eKNl7Nq1y+h+cXFx6Ny5s9G0+/fvY+bMmWjfvj3atGmD8ePH49atW4b2ouGmY8eOISIiAr6+vnjvvfewbt06vHrii2vXrmHixInw9fVF27Zt8dFHH+Hs2bMoKChAp06dsHHjxmL9MXToUMyYMaPMPjt8+DBUKhVatWqF999/H6tXr4ZOpwMA7NmzB927dwcATJw4EV5eXtiwYUOZj/eqWrVqQa/XQ6/XG6Z9//33GDp0KPz9/eHv74/Ro0fjypUrhvY5c+bg5MmTOHv2rGEo7tXlHjhwAD169EC7du0wYcIEZGRkGNqK+vXw4cOYM2cO/Pz8MGXKFACATqfDqlWr0LVrV7Rq1QoqlQqHDx8uV78U9Y2XlxeuXr2K4cOHo02bNhg4cCCuXr2KJ0+eYN68eWjXrh169OiBxMTE1/aTTqfDxo0b0bNnT7Rq1QrvvfceFixYYDTP9u3bERwcjFatWqFnz57Yvn27UfucOXPw0UcfGU0r6ovTp08D+M/rd8eOHYiNjUVAQAA6deqEJUuWID8/37Buy5cvh06ne+uHp7lH8xZ7NRykUikkEonh9ldffYWAgACsWLHC8IF96NAhzJ07F8OGDcOsWbNw+/ZtrFy5EsCLNygAXLp0CbNnz0avXr0QERGBa9euYebMmRWqUavVYtiwYahXrx6ioqJgb2+PzZs3Y9y4cTh69ChkMplh3piYGPTq1Qtr1qzBTz/9hLVr18LT0xM9e/YEANy4cQPDhg2Dh4cHoqKiUKdOHVy+fBlqtRp2dnbo378/9u/fj8mTJxse89atW/jtt9/w5ZdfllrjDz/8gFmzZmHQoEGYO3curl27hjVr1uDRo0dYtGgRunfvDkdHR8yYMcMwJKZQKMpcb0EQUFhYCL1ej5s3b2Lr1q3o3LkzatasaZgnLS0NgwYNQuPGjZGfn4+DBw9i+PDhOHz4MBo2bIjp06cjPT0dz58/R0REBAAYLffXX39Feno65s+fj6dPn2LZsmWIjIzEpk2bjGpZvny5oV9tbF5sm/7tb3/DN998g6lTp6Jly5Y4cuQIZs2aBRsbG/Tu3dukfnnZ3LlzMWLECEycOBGxsbGYMWMGWrRogWbNmmHt2rXYvXs35s6dC19fX7i7u5fabwsXLkRCQgImTJgAPz8/ZGdn48SJE4b2nTt3YtmyZRg7diw6d+6Ms2fPYtmyZSgoKMDHH39c5nNSki1btqBTp06IjY3F1atXERcXh0aNGmHs2LHo3r07bty4gR07dmDnzp0A3uLhaYHeOmvWrBE8PT2L/Z05c0YQBEEoKCgQPD09hUGDBhndT6fTCV26dBEWLlxoNP3vf/+74O3tLWRnZwuCIAhTpkwRVCqVoNfrDfOsXbtW8PT0FA4cOGC0jJ07dxo91t/+9jehU6dOhtuxsbFCQECA8OjRI8M0rVYr+Pj4CLt27RIEQRBu3boleHp6CuHh4UaP1adPH2H27NmG29OmTRO6du0qPH/+vMR+uXnzpuDp6Sn8/PPPhmkrV64UAgMDhcLCwhLvIwiCMHDgQGHMmDFG0zZu3CgolUohIyPDqMYffvih1Md5eb5X/1QqlZCenl7q/XQ6nVBQUCD06NFD2Lhxo2H65MmThdGjRxebf+jQoYKfn5+Qk5NjmLZlyxbBy8tLyMvLM6pl2rRpRvfVaDRC69athQ0bNhhNHzt2rBAaGlquftm9e7fg6ekpxMfHG+Y5ceKE4OnpKURERBimZWdnCy1atBD+8Y9/lNoH169fFzw9PYUdO3aU2F5QUCB06tSp2Os3IiJC8PPzM6z37NmzhQ8//NBonlefv6LX78iRI43mmzhxojB06FDD7a1btwpKpbLUmt8WHDp7Szk6OmLv3r1Gf97e3kbzvP/++0a3U1JSkJGRgd69e6OwsNDw16FDBzx//hw3b94E8GKPJigoyGjvqLSjrF7nn//8JwIDA1GzZk3D8hwdHdGyZUtcvnzZaN7AwECj2x4eHkZDQefPn0efPn1gb29f4rI8PDzQtm1b7Nu3DwCg1+sRHx+PAQMGQCqVlnifgoICXLt2DSEhIUbTQ0NDodPp8Pvvv5d7nQEgIiICe/fuxZ49e7Bu3TrUqFEDEydOxLNnzwzz3LhxA59++ik6deoEpVKJli1b4s6dO0bDimXx9vY22sJu3rw5BEFAZmam0Xyvvg6uX7+OvLy8Yuvcu3dv3Lx5E9nZ2eXul44dOxr+f+eddwAAHTp0MEyrU6cOnJ2djZ7PV50/fx4AMHDgwBLb1Wo1Hj58WGJNOTk5htdvebzuNUcvcOjsLSWVStG6desy53FxcTG6XXR47bhx40qcPz09HQCg0WiK3ffV26bKysrC5cuXcejQoWJtrwbGq8MSdnZ2yMvLM9x+9OgRXF1dy1ze4MGDER0djYiICPzyyy9IT0/HoEGDSp1fq9VCp9OVur7Z2dllLq8077zzjtHz4+Pjgy5duuDAgQMYNmwYcnNzMW7cOLi7u2P+/PlQKBSwt7fHggULjNa5LE5OTka37ezsAKDY/V9dtwcPHgAA6tWrZzS96HZOTg7y8vLK1S8v11JUx+uez1dlZ2fD0dHRaHjxZUUB+mrdRTU9evSo1McuTXlrfFsxaKhUL++RAC+2KgFg2bJl8PT0LDZ/48aNAbx442o0GqO2V29LpVLY2tqioKDAaPqrb3ZnZ2e8++67+OSTT4otr3bt2iauyX/qL/qQLE1oaCiWLVuGpKQknD59Gu3atUPTpk1LnV8ul0MqlUKr1RpNL1pfZ2fnctVYGldXV9SpUwcpKSkAXny/kpmZiR07dhj2AIAXH/KV7dXXQVFYazQaow/ahw8fAngRGrVq1TJLv7zM2dkZubm5ePbsGRwcHIq1u7m5GdXwak1Fr297e/vXvi6pfDh0RiZr3rw5XF1dcf/+fbRu3brYX9GHR+vWrXHq1CmjI76OHz9u9FgSiQTu7u6GD07gxRFD586dM5qvQ4cOuHHjBry8vIotr6wAKEmHDh2QmJhoOCqoJDVr1kRoaCi+/fZbnDhxosy9GeDFFqxSqcTRo0eNph85cgRSqRRt2rQpV42lycjIQHZ2tuHL/OfPnwOA0cEQP//8s2Gv8uX6KnsL28vLC/b29sXW+ejRo2jevDmcnZ3N1i8vKxp+K+kHqgDQoEED1KtXr8Sa6tSpg+bNmwMA6tevj3v37hm9Ts6cOVOhmuzs7KDT6YodePO24R4NmUwqlWLu3LlYsGABcnJy0KVLF9ja2uLu3bs4fvw4Nm7cCJlMhgkTJmDo0KGYNWsWBg4ciOvXrxu+93hZjx49sHv3brRo0QINGjTAnj17DB+gRT7++GMkJCRg1KhRGDFiBNzc3PDw4UNcuHAB/v7+CA0NNbn+6dOnY/DgwRgxYgTGjBkDZ2dnXLlyBfXq1TMa1x88eDCGDBmCmjVrGo6get3jTpw4EQsXLkRISAiuXbuGtWvXYujQoYat6PL697//DScnJwiCgPT0dGzZsgVOTk6G9W3bti0cHBwQERGBcePGIS0tDevXry+2vGbNmuH06dM4ceIE3N3d4e7uXuGaisjlcowcORLr1q2DjY0N3n33XRw9ehQ//fQTVq1aZZhPjH4pS/PmzfHBBx8gOjoaDx8+hK+vLx49eoQTJ05g5cqVkEqlmDJlCqKiouDk5ISOHTvi/Pnz2L17Nz7//HNDaAcHB2PdunWIiIjAgAEDcPny5VLD63WaNWsGANi2bRv8/f3h6OhY7g2k6oBBQ+XSr18/ODk5YfPmzdi7dy9sbGzwpz/9Cd26dYOt7YuXk4+PD1auXIm4uDicOHEC3t7eiIuLK/bbhOnTpyMrKwtxcXGws7PDyJEj4eHhgb179xrmcXFxwe7duxEXF4dly5YhJycHbm5u8PX1hZeXV7lq9/DwwM6dOxEbG4uFCxdCIpHgz3/+c7FTsfj4+KBevXro0qWLScNzXbt2xcqVK7Fp0ybEx8dDLpdj/PjxmDZtWrnqe9ny5csN/9erVw+tW7dGdHS0YY/Gzc0Nq1evRkxMDCZNmoSmTZsiKiqq2O+ARowYgevXr2P+/PnIycnBjBkz8Omnn1a4riKfffYZ7OzssGPHDmi1WjRp0gQrV640CmYx+uV1lixZgoYNG2Lfvn3YvHkzXFxc0KVLF0N7WFgYCgoK8O233+Kbb76BQqHA/PnzMXr0aMM8LVq0wNKlS7F582YkJSWhQ4cOiI6OxvDhw8tdT4cOHTB27Fhs27YNsbGx6NChA7Zt21YZq2pVJILASzmT+HJyctC+fXusWLEC/fv3t3Q5Zbp27Rr69++Pb7/9Fv7+/pYuh8jqcY+G6P/TarVITU3FqlWr0KJFC4YMUSXhwQBE/9/JkycxfPhwaLVao6ErInozHDojIiJRcY+GiIhExaAhIiJR8WCAUmRlPYFez1FFIiJT2NhIULduyRf0Y9CUQq8XGDRERJXAbENnn376Kfr164cBAwYgLCwMV69eBQCkpqZiyJAh6NWrF4YMGWJ05lkx2oiIyLzMdtRZbm6u4QR8J06cwPr167F//36MGjUKH3zwAfr374/4+Hh8//33hiveidFmKo3mMfdoiIhMZGMjgYtLyWfSMNsezctneX38+DEkEgk0Gg2Sk5OhUqkAACqVCsnJydBqtaK0ERGR+Zn1O5qFCxfizJkzEAQBW7ZsgVqthru7u+GiUlKpFG5ublCr1RAEodLb5HK5ybWWlsxERFQ+Zg2a6OhoAC9O471ixQrMmDHDnIsvFw6dERGZrkoMnb1swIABOH/+POrXr4+MjAzodDoAL65HkpmZCYVCAYVCUeltRERkfmYJmidPnkCtVhtunzp1CnXq1IGLiwuUSiUSEhIAAAkJCVAqlZDL5aK0ERGR+ZnlqLOHDx/i008/xbNnz2BjY4M6depg3rx5aNmyJVJSUhAeHo6cnBw4OTkhJibGcLEgMdpMxaEzIiLTlTV0xpNqloJB83ZzdpTBroa9pctAwfM8ZOeWfulpoqqirKDhmQGISmBXwx6Jo8ZaugyEbt8KMGjIyvGkmkREJCoGDRERiYpBQ0REomLQEBGRqBg0REQkKgYNERGJikFDRESiYtAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCoGDRERiYpBQ0REomLQEBGRqBg0REQkKgYNERGJikFDRESiYtAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCpbcywkKysLc+fOxZ07dyCTyfDOO+8gKioKcrkcQUFBkMlksLe3BwDMmTMHXbp0AQCkpqYiPDwc2dnZcHZ2RkxMDJo0afJGbUREZF5m2aORSCQYP348jh07hkOHDqFx48aIjY01tK9Zswbx8fGIj483hAwAREZGIiwsDMeOHUNYWBgWLVr0xm1ERGReZgkaZ2dnBAQEGG77+PggLS2tzPtoNBokJydDpVIBAFQqFZKTk6HVaivcRkRE5meWobOX6fV67Nq1C0FBQYZpc+bMgSAI8PX1xaxZs+Dk5AS1Wg13d3dIpVIAgFQqhZubG9RqNQRBqFCbXC43uU4Xl9qVuNZEFefq6mjpEojeiNmDZsmSJahZsyZGjBgBAPjuu++gUCiQn5+P6OhoREVFGQ2rWYpG8xh6vWDpMshCqtKH+4MHuZYugei1bGwkpW6gm/Wos5iYGNy+fRurVq2Cjc2LRSsUCgCATCZDWFgYfv31V8P0jIwM6HQ6AIBOp0NmZiYUCkWF24iIyPzMFjRxcXG4fPky1q9fD5lMBgB4+vQpcnNfbK0JgoDExEQolUoAgIuLC5RKJRISEgAACQkJUCqVkMvlFW4jIiLzkwiCIPr40I0bN6BSqdCkSRPUqFEDANCoUSOEh4dj2rRp0Ol00Ov18PDwQEREBNzc3AAAKSkpCA8PR05ODpycnBATE4NmzZq9UZupOHT2dnN1dUTiqLGWLgOh27dy6IysQllDZ2YJGmvEoHm7MWiIyqfKfEdDRERvHwYNERGJyuyHN5N46taRwVZmb9EaCvPzkPUo36I1EFHVwqCpRmxl9vhlxXiL1uA7dwsABg0R/QeHzoiISFQMGiIiEhWDhoiIRMWgISIiUTFoiIhIVAwaIiISFYOGiIhExaAhIiJRMWiIiEhUDBoiIhIVg4aIiETFoCEiIlExaIiISFQMGiIiEhWDhoiIRMWgISIiUTFoiIhIVAwaIiISFYOGiIhExaAhIiJRMWiIiEhUDBoiIhKVWYImKysLEyZMQK9evdC3b19MnToVWq0WAJCamoohQ4agV69eGDJkCG7dumW4nxhtRERkXmYJGolEgvHjx+PYsWM4dOgQGjdujNjYWABAZGQkwsLCcOzYMYSFhWHRokWG+4nRRkRE5mWWoHF2dkZAQIDhto+PD9LS0qDRaJCcnAyVSgUAUKlUSE5OhlarFaWNiIjMz9bcC9Tr9di1axeCgoKgVqvh7u4OqVQKAJBKpXBzc4NarYYgCJXeJpfLzb26RERvPbMHzZIlS1CzZk2MGDECycnJ5l68yVxcalu6BKvl6upo6RKqFfYnWTuzBk1MTAxu376NTZs2wcbGBgqFAhkZGdDpdJBKpdDpdMjMzIRCoYAgCJXeVh4azWPo9YJIPSGOqvKB9OBBrqVLeGNVpS+B6tGfVP3Z2EhK3UA32+HNcXFxuHz5MtavXw+ZTAYAcHFxgVKpREJCAgAgISEBSqUScrlclDYiIjI/iSAIom+237hxAyqVCk2aNEGNGjUAAI0aNcL69euRkpKC8PBw5OTkwMnJCTExMWjWrBkAiNJmKmvdo/llxXiL1uA7d0u12AJ3dXVE4qixli4Dodu3Vov+pOqvrD0aswSNNWLQVAyDpnIxaMhaVImhMyIiejsxaIiISFQMGiIiEhWDhoiIRMWgISIiUTFoiIhIVAwaIiISFYOGiIhExaAhIiJRMWiIiEhUDBoiIhIVg4aIiETFoCEiIlGZHDRff/11idO3bt1aacUQEVH1Y3LQrF+/vsTpGzdurLRiiIio+nntpZzPnj0LANDr9Th37hxevnzNvXv3UKtWLfGqIyIiq/faoFm4cCEAIC8vDwsWLDBMl0gkcHV1RUREhHjVERGR1Xtt0Jw6dQoAMHfuXKxYsUL0goiIqHp5bdAUeTlk9Hq9UZuNDQ9eIyKikpkcNFeuXEFUVBSuX7+OvLw8AIAgCJBIJLh69apoBRIRkXUzOWjCw8PRrVs3LFu2DDVq1BCzJiIiqkZMDpr79+/js88+g0QiEbMeIiKqZkz+ciU4OBg//fSTmLUQEVE1ZPIeTV5eHqZOnQpfX1/Uq1fPqI1HoxERUWlMDprmzZujefPmYtZCRETVkMlBM3XqVDHrICKiasrkoCk6FU1JOnbsWCnFEBFR9WNy0BSdiqZIVlYWCgoK4O7ujpMnT5Z535iYGBw7dgz379/HoUOH4OnpCQAICgqCTCaDvb09AGDOnDno0qULACA1NRXh4eHIzs6Gs7MzYmJi0KRJkzdqIyIi8zM5aIpORVNEp9Nh48aNJp1Us3v37hg1ahSGDx9erG3NmjWG4HlZZGQkwsLC0L9/f8THx2PRokXYvn37G7UREZH5VfjcMVKpFJMmTcKWLVteO6+fnx8UCoXJj63RaJCcnAyVSgUAUKlUSE5OhlarrXAbERFZhsl7NCU5c+bMG/+Ac86cORAEAb6+vpg1axacnJygVqvh7u4OqVQK4EWoubm5Qa1WQxCECrXJ5fJy1eXiUvuN1utt5urqaOkSqhX2J1k7k4Oma9euRqHy7Nkz5OfnIzIyssIL/+6776BQKJCfn4/o6GhERUUhNja2wo9XmTSax9DrhdfPWIVUlQ+kBw9yLV3CG6sqfQlUj/6k6s/GRlLqBrrJQfPFF18Y3XZwcEDTpk1Ru3bFt/yLhtNkMhnCwsIwefJkw/SMjAzodDpIpVLodDpkZmZCoVBAEIQKtRERkWWY/B2Nv78//P394efnhyZNmqBly5ZvFDJPnz5Fbu6LLTVBEJCYmAilUgkAcHFxgVKpREJCAgAgISEBSqUScrm8wm1ERGQZEuHlazOX4fHjx4iKikJiYiIKCwtha2uLPn36ICIiAo6OZQ8zLF26FElJSXj48CHq1q0LZ2dnbNq0CdOmTYNOp4Ner4eHhwciIiLg5uYGAEhJSUF4eDhycnLg5OSEmJgYNGvW7I3aysNah85+WTHeojX4zt1SLYZ6XF0dkThqrKXLQOj2rdWiP6n6K2vozOSgCQ8Px5MnTzBr1iw0bNgQ9+/fR1xcHBwcHBATE1OpBVcFDJqKYdBULgYNWYtK+Y7mxx9/xIkTJ+Dg4AAAaNq0KZYvX47g4ODKqZKIiKolk7+jsbe3L/Z7lKysLMhkskovioiIqg+T92gGDx6McePGYcyYMWjQoAHS0tKwbds2fPjhh2LWR0REVs7koJk8eTLc3d1x6NAhZGZmws3NDePHj2fQEBFRmUweOouOjkbTpk2xbds2JCYmYtu2bfDw8EB0dLSY9RERkZUzOWgSEhLQqlUro2mtWrUy/GaFiIioJCYHjUQigV6vN5pW9BsYIiKi0pgcNH5+fli9erUhWPR6PdauXQs/Pz/RiiMiIutXrgufffLJJwgMDESDBg2gVqvh6uqKTZs2iVkfERFZOZODpn79+ti/fz8uXboEtVoNhUIBb29v2NhU+JI2RET0FijX9WhsbGzg4+MDHx8fseohIqJqhrsjREQkKgYNERGJikFDRESiYtAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCoGDRERiYpBQ0REomLQEBGRqBg0REQkKgYNERGJikFDRESiMkvQxMTEICgoCF5eXvi///s/w/TU1FQMGTIEvXr1wpAhQ3Dr1i1R24iIyPzMEjTdu3fHd999h4YNGxpNj4yMRFhYGI4dO4awsDAsWrRI1DYiIjI/swSNn58fFAqF0TSNRoPk5GSoVCoAgEqlQnJyMrRarShtRERkGeW6lHNlUqvVcHd3h1QqBQBIpVK4ublBrVZDEIRKb5PL5eWqz8WldiWu7dvF1dXR0iVUK+xPsnYWC5qqTqN5DL1esHQZ5VJVPpAePMi1dAlvrKr0JVA9+pOqPxsbSakb6BYLGoVCgYyMDOh0OkilUuh0OmRmZkKhUEAQhEpvIyIiy7DY4c0uLi5QKpVISEgAACQkJECpVEIul4vSRkREliERBEH08aGlS5ciKSkJDx8+RN26deHs7IzDhw8jJSUF4eHhyMnJgZOTE2JiYtCsWTMAEKWtPKx16OyXFeMtWoPv3C3VYqjH1dURiaPGWroMhG7fWi36k6q/sobOzBI01ohBUzEMmsrFoCFrUVbQ8MwAREQkKgYNERGJikFDRESiYtAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCoGDRERiYpBQ0REomLQEBGRqBg0REQkKgYNERGJikFDRESiYtAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJCoGDRERiYpBQ0REomLQEBGRqBg0REQkKgYNERGJytbSBQBAUFAQZDIZ7O3tAQBz5sxBly5dkJqaivDwcGRnZ8PZ2RkxMTFo0qQJAFS4jYiIzKvK7NGsWbMG8fHxiI+PR5cuXQAAkZGRCAsLw7FjxxAWFoZFixYZ5q9oGxERmVeVCZpXaTQaJCcnQ6VSAQBUKhWSk5Oh1Wor3EZEROZXJYbOgBfDZYIgwNfXF7NmzYJarYa7uzukUikAQCqVws3NDWq1GoIgVKhNLpdbbP2IiN5WVSJovvvuOygUCuTn5yM6OhpRUVEYM2aMRWtycalt0eVbM1dXR0uXUK2wP8naVYmgUSgUAACZTIawsDBMnjwZ8+fPR0ZGBnQ6HaRSKXQ6HTIzM6FQKCAIQoXaykOjeQy9XhBjdUVTVT6QHjzItXQJb6yq9CVQPfqTqj8bG0mpG+gW/47m6dOnyM198UYSBAGJiYlQKpVwcXGBUqlEQkICACAhIQFKpRJyubzCbUREZH4W36PRaDSYNm0adDod9Ho9PDw8EBkZCQBYvHgxwsPDsWHDBjg5OSEmJsZwv4q2ERGReUkEQbCu8SEzsdahs19WjLdoDb5zt1SLoR5XV0ckjhpr6TIQun1rtehPqv6q9NAZERFVbwwaIiISFYOGiIhExaAhIiJRMWiIiEhUDBoiIhIVg4aIiETFoCEiIlExaIiISFQMGiIiEhWDhoiIRGXxk2paA0enGqhhb2fRGp7nFSA357lFayAiqggGjQlq2NshbJ/nB7cAABDQSURBVO53Fq1h54rhyAWDhoisD4fOiIhIVAwaIiISFYOGiIhExaAhIiJRMWiIiEhUDBoiIhIVg4aIiETF39EQkejqOMkgs7e3dBnIz8vDo5x8S5fx1mHQEJHoZPb2+Nv8TyxdBmYt3wyAQWNuHDojIiJRMWiIiEhUDBoiIhIVg4aIiETFoCEiIlFV26BJTU3FkCFD0KtXLwwZMgS3bt2ydElERG+lahs0kZGRCAsLw7FjxxAWFoZFixZZuiQiordStfwdjUajQXJyMrZu3QoAUKlUWLJkCbRaLeRyuUmPYWMjMbpdr26tSq+zvF6tqSQyJxczVFI2U+q0Bg71LN+XQPXpTydn9mdlcXS0h0xm2av+5ucXIDc3z3C7rH6VCIIgmKMoc7p8+TLmzZuHw4cPG6aFhobiiy++QMuWLS1YGRHR26faDp0REVHVUC2DRqFQICMjAzqdDgCg0+mQmZkJhUJh4cqIiN4+1TJoXFxcoFQqkZCQAABISEiAUqk0+fsZIiKqPNXyOxoASElJQXh4OHJycuDk5ISYmBg0a9bM0mUREb11qm3QEBFR1VAth86IiKjqYNAQEZGoGDRERCQqBg0REYmKQUNERKJi0BARkagYNEREJKpqefZmMk1aWhqOHj0KtVoN4MWpe3r27IlGjRpZuDJjrLNyWUudVH1IFy9evNjSRVRHaWlp2LNnDxITE/Hjjz/i5s2bcHV1hZOTk6VLAwDs2bMH8+bNg6urK+rXr4/atWsjLS0NK1euRK1atarMWa5ZZ+WyljqBqv8eKsI6X49nBhDBnj17sG7dOvTo0cNwIk+1Wo2TJ09iypQp+PDDDy1cIdCrVy/s2rWr2PnftFothg4diqSkJAtVZox1Vi5rqdMa3kMA6zSZQJWuZ8+egkajKTZdo9EIwcHBFqiouB49epQ4Xa/Xl9pmCayzcllLndbwHhIE1mkqfkcjAr1eX+KZouvWrQuhiuxABgYGYvz48fjoo4/QoEEDAC92rXfv3o3OnTtbuLr/YJ2Vy1rqtIb3EMA6TcWhMxH85S9/wd27d0t8Mzdq1AhV4WsxvV6PgwcP4siRI0hLSwMANGjQACEhIejfvz9sbKrGAYmss3JZS53W8B4CWKepGDQisJY3M1FVZS3vIdZpGgYNFXPlypUqdfRRaVhn5bKWOsn6VI24fYtcuXLF0iW81urVqy1dgklYZ+Wyljqt4T0EsM6XMWjMrCq/mf/5z38CAL788ksLV1K6J0+e4MqVK3j8+HGVrvNlVbnOZ8+e4fLly8jJyanSdb6sKr+HXsY6/4NDZ2+pmzdvFpv28ccf47//+78hCAKaN29ugaqKW7RoEWbOnAm5XI5ffvkF06ZNQ926daHVavHFF18gMDDQ0iUCAAICAtC3b1988MEHUCqVli6nVMePH8e8efPg5uaGmJgYzJw5Ew4ODtBoNFi+fDmCgoIsXSJVQzy82cz69u2LQ4cOWboMqFQqw9EnRR4+fIgJEyZAIpHg5MmTFqrM2MWLFw2HZa5evRqbNm2Ct7c3UlNTMXv27CoTNLVq1YKNjQ3GjRuH+vXr44MPPkDfvn1Rp04dS5dmZN26ddi1axdycnIwceJEbNy4Ee3atUNKSgpmz55d5YImKysL6enpAID69eujbt26Fq6IKoJBI4KS9haKZGVlmbGS0k2dOhW///47Fi9ejIYNGwIAgoKCcOrUKQtXZiwvL8/w/5MnT+Dt7Q0AaNq0KQoKCixVVjF16tTBggUL8Pnnn+PkyZPYt28fVq5ciffffx+DBw+uMr9RkUgk8PLyAvAiHNu1awcA8PDwsGRZxdy5cwf/9V//heTkZLi5uQEAMjMz8e677+Ivf/kLmjRpYtkCTVBVNiqBF587sbGxUKvV6N69O4YPH25omzZtGtauXSvq8hk0IlCpVGjYsGGJP4TKzs62QEXFTZ06FcnJyZg9ezb69++PYcOGQSKRWLqsYjp27Ii//vWvmDFjBgICApCYmIjQ0FCcOXMGzs7Oli6vGDs7O4SEhCAkJASZmZnYt28flixZgqNHj1q6NAAvgiYlJQU5OTl4+vQpLl68CB8fH6SmpkKn01m6PIO5c+ciLCwMW7duNRx6q9frcejQIcybNw//+Mc/LFzhC9awUQkAkZGRaNSoEbp27Ypdu3bh7NmzWLVqFWxtbXH37l3Rl8/vaETQvXt37Ny5E+7u7sXaunbtih9++MECVZUsPz8fa9aswR9//IHU1FScPn3a0iUZyc/Px4oVKxAfHw9nZ2fcvXsXtra2CAgIwOLFi9G4cWNLlwgAGDBgAA4cOGDpMl7rf/7nfzBv3jzY2NggLi4OX375JR48eID09HQsXrwYKpXK0iUCAEJCQkoN57LazK1FixalblRmZmbi8uXLFqiquP79+yM+Ph4AIAgCoqKicOfOHWzYsAFDhgwR/bXLPRoR9OzZE/fv3y8xaIKDgy1QUelkMhnmzJmDixcv4sKFC5YupxiZTIaIiAjMmjULd+7cgU6nQ4MGDarcWP369estXYJJunXrZvQ8+/v74+rVq6hfvz7q1atnwcqMOTs7IyEhAX369DHsaQuCgEOHDlWpsyI3bNiwzI3KqiI/P9/wv0QiQWRkJGJiYjBx4kSj4WmxcI+GiKqcW7duITIyElevXjV8iGdkZKBFixZYvHgxmjVrZuEKX4iJiUFwcLDhu66XLV26FBERERaoqriJEydiwoQJaN++vdH0uLg4bN68GdeuXRN1+QwaIqqytFqt0QXaSjoxJL1ednY2JBJJiUdB3rx5U/SfM/AHm0RUZcnlcrRs2RItW7Y0hEzfvn0tXJVpqlKdzs7OpR5q/9lnn4m+fH5HQ0RVTmlHcwmCUKWO5rKWo84sXSeDhoiqHGv4iQDAOk3FoCGiKsdajuZinabhdzREVOUU/USgJFXpJwKs0zQ86oyIiETFPRoiIhIVg4aIiETFoCGrExQUZLhIW3ksWrTIcKqY8+fP47333it13vDwcMTFxQEA/vWvf6FXr14VK9aMXu6XTZs2YeHChRau6PUOHjyIcePGldo+cuRI7Nmzx4wVkRh41Bm9NaKioip0Pz8/Pxw7dqySqxHXpEmTDP/fu3cP3bt3x5UrV2Br+/q3/Pnz5/H555+b5QSr/fr1Q79+/URfDlkW92iIiEhUDBqySn/88QdCQ0PRvn17zJ8/H3l5edi3bx+GDRtmNJ+Xlxdu374NwHg47FXJyckYOHAg2rZti5kzZxqd0fbVYbagoCB8/fXX6Nu3L3x9fYvN/9VXXyEwMBCBgYHYs2ePUQ1ZWVmYNGkS2rVrh8GDB2PVqlWGmu/duwcvLy8UFhYaHuvloaM7d+5g1KhRCAgIQEBAAGbPno2cnJwS12ft2rWYM2cOAGDEiBEAgPbt26Nt27a4cOEC/P39cf36dcP8Go0G3t7euH//PiZMmIDMzEy0bdsWbdu2RUZGBtq0aWP0C/LLly+jQ4cOKCgowL59+zB06FAsWbIEvr6+CAkJwdmzZw3z5ubmYsGCBQgMDESXLl0QFxdnuPbNq8/ZmTNnEBISAl9fX0RFRZX4A0OyPgwaskqHDh3C119/jePHjyM1NRUbNmyo8GPl5+djypQp6N+/Py5cuICQkBAkJSWVeZ8jR45gy5YtOHnyJK5fv459+/YBAE6fPo1t27Zh69atOH78eLFLL0RFRcHe3h4//fQTli1bhu+//97kOgVBwCeffIIff/wRR44cQXp6uklXRtyxYwcA4Oeff8Zvv/0Gf39/hIaG4uDBg4Z5EhIS0KlTJzRs2BBfffUV3Nzc8Ntvv+G3336Du7s7/P39ceTIEcP8Bw8eRJ8+fWBnZwcAuHTpEho3boxz585h+vTpmDp1quEX5/PmzYOtrS2SkpJw4MABnDlzpsTvXbRaLaZNm4aZM2fi3Llz+NOf/oRff/3V5P6hqotBQ1Zp+PDhUCgUcHZ2xuTJk3H48OEKP9bvv/+OgoICjB492nCFzNatW5d5n5EjR8Ld3R3Ozs7o1q0brl69CuBFAA0aNAh//vOf4eDggKlTpxruo9PpkJSUhOnTp6NmzZrw9PTEwIEDTa7znXfeQefOnSGTySCXyzF27Fj8/PPPFVrngQMHIiEhAXq9HgAQHx9f5nclAwcONASTTqfD4cOH0b9/f0O7XC439F9oaCiaNm2K//3f/8XDhw9x+vRpLFiwADVr1oSLiwvGjBlT4vN1+vRpNG/eHCEhIbCzs8Po0aOr1DVyqOJ4MABZJYVCYfi/QYMGyMzMrPBjZWZmwt3d3ehS1g0aNCjzPq6urob/HRwcDMvPzMxEq1atSqxTq9WisLCwWO2m0mg0WLp0Kf71r3/hyZMnEAShwhcBa9OmDRwcHHDhwgW4urrizp076N69e6nzd+/eHZGRkbh79y5SU1NRu3ZteHt7G9pL6r/MzEykpaWhsLAQgYGBhja9Xm/UB0UyMzNRv359w22JRFLifGR9GDRklYquUQIAaWlpcHNzg4ODA54/f26Y/uDBA5Mey9XVFRkZGRAEwfBhmZaWVqHLRLu5uSEjI6PEOuVyOWxtbaFWq+Hh4VGsvWbNmgCA58+fo3bt2sXWYeXKlZBIJDh48CDq1q2LEydOmHQk3csB8LKivRRXV1f06tUL9vb2pc5vb2+P3r174+DBg/j3v/9ttDcDoFj/qdVqBAUFoX79+pDJZDh37txrj3hzdXVFenq64bYgCEb9Q9aLQ2dklXbu3In09HRkZ2dj8+bNCA0NRYsWLXDjxg1cvXoVeXl5Jn1/AQA+Pj6wtbXF9u3bUVhYiKSkJPzxxx8VqiskJAT79u1DSkoKnj17ZnSJZ6lUiuDgYKxbtw7Pnj3DzZs3sX//fkO7XC6Hu7s74uPjodPpsHfvXty9e9fQ/uTJE9SsWRNOTk7IyMjAli1bTKpJLpfDxsbG6LGAF9eRP3HiBA4ePIgBAwYYpru4uCA7Oxu5ubnF5t+/fz9OnTpVbJhNq9Vi+/btKCgowJEjR5CSkoKuXbvCzc0NnTt3xl//+lc8fvwYer0ed+7cKfGy4V27dsWNGzeQlJSEwsJCbN++HQ8fPjRpHalqY9CQVVKpVBg3bhx69OiBxo0bY/LkyWjatCmmTJmCMWPGoGfPnvD19TXpsWQyGdauXYv9+/ejffv2SExMrPCJBrt27YqRI0di1KhRCA4Oho+Pj2EZwIsfjT59+hSdO3dGeHg4Bg0aZHT/JUuW4Ouvv0ZAQABu3ryJtm3bGtqmTp2K5ORk+Pn5YeLEiejZs6dJNTk4OGDSpEkYNmwY/Pz8cPHiRQBA/fr18e6770IikcDPz88wv4eHB/r06YMePXrAz8/PsIfm6+sLGxsbtGzZEo0aNTJahre3N27fvo0OHTpg1apVWLNmDerWrQsAWLFiBQoKCgxHCU6fPr3EvU25XI7Vq1dj5cqVCAgIwO3bt0u8RDJZH55Uk0hEKSkpUKlU+OOPP0ocOtq3bx/27NmDXbt2WaA6YP78+XBzczP5KoujRo1C37598eGHHxqmWXodqOrjHg1RJTt+/Djy8/Px6NEjfPHFF+jWrZtJv8g3t3v37uH48eMYPHiwSfNfunQJycnJ6N27t8iVUXXDoCGqZH//+9/RsWNHBAcHQyqVYvHixZYuqZhVq1ahb9+++Pjjj0066GHevHkYO3YsFixYYDhQgchUHDojIiJRcY+GiIhExaAhIiJRMWiIiEhUDBoiIhIVg4aIiET1/wDlbaau8oEbuQAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# set basic sns \n",
+    "color = sns.color_palette()\n",
+    "sns.set(style=\"darkgrid\")\n",
+    "# convert dataframe to pandas for ease of use with sns\n",
+    "pd_train = df_train.to_pandas()\n",
+    "# set ax plot\n",
+    "ax = sns.countplot(x=\"buildingqualitytypeid\", data=pd_train)\n",
+    "# adjust fringe aesthetics\n",
+    "plt.xticks(rotation='vertical')\n",
+    "plt.title(\"Frequency of Bathroom count\", fontsize=15)\n",
+    "# display the graph\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 274
+    },
+    "colab_type": "code",
+    "id": "KOHPCFRSp5y9",
+    "outputId": "e0f3fe2e-a82a-49e8-a798-a3f79a30bcee"
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD7CAYAAACCEpQdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAZbklEQVR4nO3dfXBU9aHG8Wc360YC7CS7hLcasc5tuHFGpBMHGOrwErxkpgVtYaZYedERday1tfY6aumLVqiYlltkCt7QXm4ptUP/IUMrzC04g+JFilVrUVpqbEBgRhLYTbwBxITNnvsHbMzL7mbfzzn5fT9/hXP2/M5zTvY8uzl72OOxLMsSAMAoXrsDAACKj/IHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABvLZHaCj44JiMUuh0ChFIuftjpMVstuD7MXn1tzS8Mnu9XpUUTEy5zFtL/9YzFIsZvX+7FZktwfZi8+tuSWy98VpHwAwEOUPAAai/AHAQEOWf0NDg+rq6jR58mQ1NzdLkjo6OnTfffepvr5eCxcu1EMPPaT29vaChwUA5MeQH/jOmzdPK1as0NKlS3uneTwe3XvvvZo+fbqkyy8Q69at0zPPPFO4pCi4zkMHFW7aoWh7RPJ6pVhMvmBIYxYtVmDGzMGPifN6FZg1W+OX3ZVyzIFjZZstPk7lwvqstzUX6W5Twn11RXOCceNjffTa/+qTo0cTzhtqPb5gSGVTpujjd97pl09S0t9t33nxaR//8311vrpfisUG/X5bX/j1p/MG8nikgV8UfGV93lGjZFmWrAsXeqdlzOvV1ZMnK9p25vK2lJZK3d391plsP7ZOuVHdl6KD9m1eDNzuFMeEU3jS/Urnuro6NTY2qrq6etC8PXv2aPv27dq6dWvGASKR84rFLFVWjtbZs+cyXt4JhkP2zkMH1bZtq6zu7kGP8fj9GrfibklK+hhJCsyZ2+/JnmjM+FiZvAAkG+dfHvq6PDd8Pu1x8iHdbUq1P1NKVJ65rqekRJJH6ommNy9JhsCcuRoxwq+2/9mT5sbYKMV+LKaBx0S2+naM1+tRKDQq5zFzPucfi8W0fft21dXV5RwG9gk37UhaIFZ3t8JNO1I+RtLld4NDjBkfK9dsVne3Tv7mtxmNkw/pbtNQ+yqpFIWV9Xp6ehIXf7J5STJ0vrpfbXteSr0up3BA8UuDjwknyfk6/9WrV6usrEzLli3Lavm+r2CVlaNzjWMbt2dv7kj9mU10iPmSpFis335INma0oz2j/ZVsnK5wpOj7Pd1tGmp/ZqtY60kom9M0phtwTOQi38/1nMq/oaFBJ06cUGNjo7ze7P6I4LSPveLZfRXBhOem43wVQUlK+Rh5vf32Q7IxfRXBjPZXsnFKx4SKvt/T3aah9mcu6y/GehKKH+O8CKRvwDGRLUed9lm/fr2OHDmiTZs2ye/35xwE9hqzaLE8SX6PHr9fYxYtTvkYSQrMmj3kmPGxcs3m8ft17fKlSZYonHS3aah9lZTHk3xWtuspKZFKkrzPSzQvSYbArNkaV/9vqdflFCn2YzENPCacpOSpp556KtUD1qxZo+9///s6c+aM9uzZo6amJk2bNk3f+c53dNVVV2nnzp363e9+pwMHDuhLX/pSxgEuXuyWZUkjR5bq44+zOEfqAMMhe+k1VboqFNInH3yg2MWLl9/lWZZ8wZDG3nGnAjNmDn5MnNerwOw5gz7YGvj4vmNlItk4VfPnFX2/p7tNSfdVCr5gSGOXLlP04seKhsOD56WxHl8wpFHTZ6in89yn+e5cqlGf/3zi3+2AefEM3kBAXSdPXj533uf3WzV7pv6v9eyn8wZKVLpX1ucdNUq66irp0qXeaRnzenX1v/6rFLMub0tp6aC/RJLtx8CUG+UJhgbt27wYuN1Jjols9e0Yj8ejsrLc33CnfbVPoXDax15kt4dbs7s1tzR8stt+2gcA4F6UPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAlD8AGCjnr3QGgGzk6y5vyA7lD6DoBt6BLNoeUdu2rZLEC0CRcNoHQNHl6y5vyB7lD6Dokt2Apmg3pgHlD6D4fMFQRtORf5Q/gKLL113ekD0+8AVQdPEPdbnaxz6UPwBbBGbMpOxtxGkfADAQ5Q8ABqL8AcBAlD8AGIjyBwADUf4AYCDKHwAMRPkDgIEofwAw0JDl39DQoLq6Ok2ePFnNzc29048fP64lS5aovr5eS5Ys0QcffFDInACAPBry6x3mzZunFStWaOnSpf2mP/nkk7rzzjt1++236/e//71++MMfatu2bQULarfWF36tzlf3S7GY5PUqMGu2yv7lcwo37VBze0TyeqVYrKDfUdL3zke9PB7J75e6utLKMHCM5kGPKJ54ztZf/0q6dCnj5ZNlv7qmRtf+++ODpp/8jwZ9cvTopxNGjJAuXsx4vflg537PhVtzSw7LPmKEqn/+n7ZG8FiWZaXzwLq6OjU2Nqq6ulqRSET19fV6/fXXVVJSop6eHk2fPl179+5VMBjMKEAkcl6xmKXKytE6e/ZcVhtRaK0v/Fqdr7w8eMaVsh3I4/dr3Iq78/oCMPDOR0NJlCHTMdxs4AvAoOIH7JbBC0DffvR6PQqFRuW8+qzO+Z8+fVrjxo1TSUmJJKmkpERjx47V6dOncw7kRJ2v7k88I0HxS4W5I1GiOx+lkihDpmO42cCip/jhODb91Rln+7d69n0Fq6wcbWOS5JqTlHwq0Y72vG5Pc0d7zhmyGcPN+m27jTmAZDLpiHz3Y1blP2HCBLW1tamnp6f3tM+ZM2c0YcKEjMdyw2mfZKd3UvFVBPO6Pb6KYMa3uBuYIZsx3MyxzyfginSfo4457RMKhVRTU6Ndu3ZJknbt2qWampqMz/e7RWDW7MQzvIl3XyHuSJTozkepJMqQ6RhudnVNTcp/A7YbMcLW1Q/5ge+aNWu0d+9ehcNhVVRUqLy8XLt371ZLS4ueeOIJdXZ2KhAIqKGhQddff33GAVzxzl+pr/aJuvhqHzvlerVPMm642geGy/Bqn0K880/7ap9CcUv5p0J2e5C9+NyaWxo+2W097QMAcDfKHwAMRPkDgIEofwAwEOUPAAai/AHAQJQ/ABiI8gcAA1H+AGAgyh8ADET5A4CBKH8AMBDlDwAGovwBwECUPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAlD8AGIjyBwADUf4AYCDKHwAMRPkDgIF8uQ7w8ssva8OGDbIsS7FYTN/85jc1f/78fGQDABRITuVvWZYee+wx/fa3v1V1dbX+8Y9/6Gtf+5puvfVWeb38UWGKzkMHFW7aoWh7RL5gSGMWLVZgxky7Y2VlOG2Lmwyn/Z5oWyQ5bvtyfufv9Xp17tw5SdK5c+c0duxYit8gnYcOqm3bVlnd3ZKkaHtEbdu2SpLtT+5MDadtcZPhtN8TbUvrr7ZI8kg90d5pTti+nFra4/Houeee04MPPqi5c+fqG9/4hp599tl8ZYMLhJt29D7R46zuboWbdtiUKHvDaVvcZDjt90Tbop6e3uKPc8L25fTOPxqNavPmzXr++edVW1urt956S4888oh2796tkSNHpjVGKDSq9+fKytG5xLGVqdmbO9oTTo92tBdln+RzHcXeFrc+Z/Kdu5j7vdD7PNm2JJLp9uU7e07lf/ToUZ05c0a1tbWSpNraWo0YMUItLS2aMmVKWmNEIucVi1mqrByts2fP5RLHNiZn91UEFW2PJJxe6H2S7/1ezG1x63OmELmLtd+Lsc+TbUuyx6abp292r9fT701ztnI67TN+/Hi1trbq2LFjkqSWlhaFw2Fde+21OQeDO4xZtFgev7/fNI/f3/shl5sMp21xk+G03xNti0pKpJL+77OdsH05vfOvrKzUU089pYcfflgej0eStHbtWpWXl+clHJwv/oGV065kyMZw2hY3GU77Pdm2JJpm9/Z5LMuy7AzAaR97kd0ebs3u1tzS8MnuiNM+AAB3ovwBwECUPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAOd/Mpdiav/l16eLFftPi35Xx8T/fV+er+6VYTPJ6FZg1W52vvFz4TAVfQ+GQ3R5uze7W3JKN2T0eybIkr1eKxfhun7hMvtsnUfH3iu9gAHA4j9+vcSvuTvsFgO/2SVb8EsUPwDWccCcvd5U/AAwT6d70pVAofwCwgS8YsnX97ir/ESOSz7tyMxkAcDon3MnLVeVf/fP/TPgC4AuGNH7lfQrMmXv5E3Xp8tU+c+YWOSEADBB/Y3qlm3zBUEYf9haKq672cSqy24PsxefW3NLwyW7m1T4AgLyg/AHAQJQ/ABiI8gcAA1H+AGAgyh8ADET5A4CBKH8AMBDlDwAGyvlmLl1dXXrmmWf0pz/9SaWlpZo6dapWr16dj2wAgALJufx/+tOfqrS0VHv27JHH41E4HM5HLgBwvc5DBxVu2qFoe8Qxd/CKy6n8L1y4oJ07d2r//v3yXPnyojFjxuQlGAC4Weehg2rbtlVWd7eky9/f37ZtqyQ54gUgp3P+p06dUnl5uTZu3KhFixZp+fLlevPNN/OVDQBcK9y0o7f445xwB6+4nN75R6NRnTp1SjfccIMef/xxHT58WA888IBeeukljRqV3rfO9f12usrK0bnEsRXZ7UH24nNrbqm42Zs72hNOj3a0Z5Uj39lzKv+JEyfK5/NpwYIFkqSbbrpJFRUVOn78uG688ca0xuArne1Fdnu4Nbtbc0vFz+6rCCa8VaOvIphxDsd9pXMwGNT06dP12muvSZKOHz+uSCSiSZMm5RwMANxszKLF8vj9/aY54Q5ecTlf7fOjH/1Iq1atUkNDg3w+n37yk58oEAjkIxsAuFb8Q91hebWPJFVVVek3v/lNPrIAwLASmDHTMWU/EP/DFwAMRPkDgIEofwAwEOUPAAai/AHAQJQ/ABiI8gcAA1H+AGAgyh8ADET5A4CBKH8AMBDlDwAGovwBwECUPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAlD8AGIjyBwADUf4AYCDKHwAMRPkDgIEofwAwUN7Kf+PGjZo8ebKam5vzNSQAoEB8+Rjkb3/7m/76179q4sSJ+RguY52HDirctEPR9oh8wZDGLFqswIyZQ87LdsyT/9GgT44e7X1sv5c7j0e66iqpu1vyeqVYrHd5Sb1jxufZzc0v1WQvPrfmlpybvfq/ttqy3pzLv7u7W08//bTWrVunu+66Kx+ZMtJ56KDatm2V1d0tSYq2R9S2bWvv/GTzUr0ApBrzo9f+t1/xD2JZl4tf6i33aHtErb/6b0mW1NPTbx4AszXfe7ctLwA5l/+GDRt02223qaqqKh95MhZu2tFb0nFWd7fCTTt6f040L1X5pxoz2h7JLmhPNLvlAKAAcir/t99+W++++64effTRrMcIhUb1/lxZOTrj5Zs72hNOjyaZHp+Xal3ZjAkA2Uqn+7Lpx1RyKv833nhDx44d07x58yRJra2tWrlypdauXatbbrklrTEikfOKxSxVVo7W2bPnMs7gqwgmfDfuqwhKUtJ5qdaVasys3/kDQBJDdV/ffvR6Pf3eNGcrp6t97r//fh04cED79u3Tvn37NH78eG3ZsiXt4s+HMYsWy+P395vm8fs1ZtHilPOyHfPqmprsgpb4pJKS7JYFgDzLy9U+doqfu091RU+mV/ukGjMwY+agq336cdnVPgDsZdfVPh7Lsixb1nxFrqd9nIDs9iB78bk1tzR8sjvitA8AwJ0ofwAwEOUPAAai/AHAQJQ/ABiI8gcAA1H+AGAgyh8ADET5A4CBKH8AMBDlDwAGovwBwECUPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAlD8AGIjyBwADUf4AYCDKHwAMRPkDgIEofwAwEOUPAAby5bJwR0eHHnvsMZ08eVJ+v1+TJk3S008/rWAwmK98AIACyOmdv8fj0b333qs9e/boxRdfVFVVldatW5evbACAAsmp/MvLyzV9+vTef0+dOlUffvhhzqEAAIXlsSzLysdAsVhM99xzj+rq6rRixYp8DAkAKJCczvn3tXr1apWVlWnZsmUZLReJnFcsZqmycrTOnj2XrzhFRXZ7kL343JpbGj7ZvV6PQqFROY+Zl/JvaGjQiRMn1NjYKK+XC4gAwOlyLv/169fryJEj+sUvfiG/35+PTACAAsup/N9//301Njbquuuu0x133CFJuuaaa7Rp06a8hAMAFEZO5f+5z31O7733Xr6yAACKhBP0AGAgyh8ADET5A4CBKH8AMBDlDwAGovwBwECUPwAYiPIHAANR/gBgoLx9qycu6zx0UOGmHYq2R+QLhjRm0WIFZszMeLmyKVP08TvvJByn72O9o0bJsixZFy4MuVyi9TW3RySvV4rF8r4vCq05yXRPaamsri75giH5xo3VJ++957jtS5bd6dyaW3Jgdq9XgVmzNX7ZXbasPm/f55+t4fSVzp2HDqpt21ZZ3d298zx+v8atuDvlC0Ci5QaKjyNpyMcmWq7v+tNZH4DiCMyZO+QLQCG+0pnTPnkUbtoxqFCt7m6Fm3ZkvNxA8XHSeexQ6890DACF0/nqflvWy2mfPIq2RzKanu78TB831HLZjgOgAGw6Jck7/zzyBUMZTU93ft/HpfvYVONnMwaAArHpBliUfx6NWbRYngE3tPH4/RqzaHHGyw0UHyedxw61/kzHAFA4gVmzbVkvp33yKP6haqZX+yRabqirdnK52mfg+tx6tU8ybrjaB+Bqn2F0tY8bkd0ebs3u1tzS8MnO1T4AgKxR/gBgIMofAAxE+QOAgWy/2sfr9ST82W3Ibg+yF59bc0vDI3u+tsH2q30AAMXHaR8AMBDlDwAGovwBwECUPwAYiPIHAANR/gBgIMofAAxE+QOAgSh/ADBQQcq/oaFBdXV1mjx5spqbm3unv/zyy/ryl7+s22+/XQsXLtTevXvTmnf8+HEtWbJE9fX1WrJkiT744INCxE6Z/ZVXXtFXvvIVLVy4UMuWLdOpU6fSyufk7B0dHbrvvvtUX1+vhQsX6qGHHlJ7e7srsve1cePGQcs5PXtXV5eefPJJzZ8/XwsXLtQPfvAD12R3wrGa6rmb7fHo5OwFOVatAnjjjTesDz/80Jo7d6713nvvWZZlWbFYzLr55pt7/3306FFr6tSpVk9PT8p5lmVZy5cvt3bu3GlZlmXt3LnTWr58eSFiJ83+0UcfWdOmTbOOHTvWm+Gee+7pXSZVPidn7+josA4dOtS7/LPPPmt997vfdUX2uCNHjlgrV6605syZ07ucG7KvXr3a+vGPf2zFYjHLsizr7NmzrsjulGM11XM32+PRydkLcawWpPzjBpb/tGnTrDfffNOyLMv685//bM2fP3/IeeFw2KqtrbWi0ahlWZYVjUat2tpaKxKJFDJ6v+yHDx+2vvjFL/bO6+josKqrq61IJJIyn9OzD/THP/7RuuuuuyzLcv5+tyzL6urqsr761a9aJ0+e7Lec07OfP3/eqq2ttc6fPz9oDKdnd+KxalmfPnezPR6dnj3ZMpaV/X4v2rd6ejwePffcc3rwwQdVVlamCxcuaPPmzUPOO336tMaNG6eSkhJJUklJicaOHavTp08rGAwWJftnP/tZhcNhvfPOO5oyZYpefPHF3myWZSXNl2qeE7L3zRCLxbR9+3bV1dX1znd69g0bNui2225TVVVVv+Wcnr2kpETl5eXauHGjXn/9dY0cOVIPP/ywbr75ZsdnDwaDjjtW+z53U2Vw4rGabvZCHKtFK/9oNKrNmzfr+eefV21trd566y098sgj2r17t0pLS5POc4LRo0dr/fr1Wrt2rbq6ujRr1iwFAgH5fD5dunTJ7ngppcre1+rVq1VWVqZly5bZlHSwVNnffvttvfvuu3r00UftjpnQUM+ZU6dO6YYbbtDjjz+uw4cP64EHHtBLL71kd2xJqbOnOo7t0ve5+/e//922HNnIJnu+jtWilf/Ro0d15swZ1dbWSpJqa2s1YsQItbS0yOPxJJ33mc98Rm1tberp6VFJSYl6enp05swZTZgwoVjRJUkzZ87UzJkzJUnhcFhbtmxRVVWVLl68mDSfZVmOzh7X0NCgEydOqLGxUV7v5WsAJkyY4OjsL7zwgo4dO6Z58+ZJklpbW7Vy5UqtXbtWNTU1js7+ySefyOfzacGCBZKkm266SRUVFTp+/LgmTpzo6OypjmM7jtWBz91Uz9tUx6Mdx2om2ZMtI2V/rBbtUs/x48ertbVVx44dkyS1tLQoHA7r2muvTTkvFAqppqZGu3btkiTt2rVLNTU1RfsTOO7s2bOSLv/J9bOf/Ux33HGHysrKUuZzenZJWr9+vY4cOaJNmzbJ7/f3LuP07Pfff78OHDigffv2ad++fRo/fry2bNmiW265xfHZg8Ggpk+frtdee03S5Ss1IpGIJk2a5PjsTjpWEz13sz0enZ492TLpLJdMQW7msmbNGu3du1fhcFgVFRUqLy/X7t279Yc//EG//OUv5fFcvhPNt771Ld16662SlHJeS0uLnnjiCXV2dioQCKihoUHXX399vmOnzP69731Pf/nLX3Tp0iV94Qtf0KpVq1RaWjpkPidnf//997VgwQJdd911uvrqqyVJ11xzjTZt2uT47APV1dWpsbFR1dXVrsh+6tQprVq1Sh999JF8Pp++/e1va/bs2a7I7oRjNdVzN9vj0cnZC3GscicvADAQ/8MXAAxE+QOAgSh/ADAQ5Q8ABqL8AcBAlD8AGIjyBwADUf4AYKD/B0l3Ui3/skQ5AAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# let's look more into year built vs type \n",
+    "plt.plot(pd_train.yearbuilt, pd_train.buildingqualitytypeid, 'ro')\n",
+    "# display the graph\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "_647tI5Lp94v"
+   },
+   "source": [
+    "### Final adjustments\n",
+    "- filling nans"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "ofZIC0EdKJ0Y"
+   },
+   "source": [
+    "# -----current: test ready-----\n",
+    "- converting to pandas \n",
+    "  - to see what's going on\n",
+    "    - figuring out what can and what can't be replicated in cuML"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "-4A3-sjRp8AE"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn import neighbors\n",
+    "# from cuml.preprocessing.model_selection import train_test_split\n",
+    "from sklearn.model_selection import StratifiedKFold,GridSearchCV,train_test_split\n",
+    "#location seems to be related to building quality, (knnclassifier)\n",
+    "\n",
+    "def fillna_knn(df, base, target):\n",
+    "    data_colnames = [target] + base\n",
+    "    #print(\"data_colnames\",data_colnames)\n",
+    "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
+    "    #print(\"miss\",missing_values_boolflag.head())\n",
+    "    not_missing_boolflag = ~missing_values_boolflag \n",
+    "    #print(\"not miss\",not_missing_boolflag.head())\n",
+    "    number_of_missing_val = missing_values_boolflag.sum()\n",
+    "    print(\"# of miss\",number_of_missing_val)\n",
+    "    not_missing_rows = df.loc[not_missing_boolflag, data_colnames]\n",
+    "    #print(not_missing_rows.head())\n",
+    "    Y = not_missing_rows[target]\n",
+    "    X = not_missing_rows[base]\n",
+    "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, \n",
+    "                                                        test_size=0.20,\n",
+    "                                                        random_state=3192,\n",
+    "                                                        stratify=Y)\n",
+    "    metrics       = ['euclidean'] \n",
+    "    weights       = ['distance'] \n",
+    "    numNeighbors  = [5,10,15,20,25]\n",
+    "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
+    "    cv            = StratifiedKFold(n_splits=3,random_state=3192,shuffle=False)\n",
+    "    grid = GridSearchCV(neighbors.KNeighborsClassifier(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='f1_weighted',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
+    "    grid.fit(X_train ,Y_train)\n",
+    "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
+    "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
+    "    print(\"grid.best_params_\",grid.best_params_)\n",
+    "    print(\"grid.scorer_\",grid.scorer_)\n",
+    "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
+    "    y_true, y_pred = Y_test, grid.predict(X_test)\n",
+    "    \n",
+    "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
+    "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
+    "    return Z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 573
+    },
+    "colab_type": "code",
+    "id": "AT8Osn51lD9v",
+    "outputId": "8ab0690a-2e06-468e-b7ce-f4d051a3ce83"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CURRENT DF SITUATION\n",
+      "\n",
+      "SHAPE = (90275, 45)\n",
+      "NULL COUNT = 32911\n",
+      "VALUE COUNTS\n",
+      "7.0     29310\n",
+      "4.0     23839\n",
+      "1.0      2627\n",
+      "10.0     1461\n",
+      "12.0      119\n",
+      "8.0         5\n",
+      "6.0         2\n",
+      "11.0        1\n",
+      "Name: buildingqualitytypeid, dtype: int32\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0    null\n",
+       "1     4.0\n",
+       "2    null\n",
+       "3     4.0\n",
+       "4     7.0\n",
+       "Name: buildingqualitytypeid, dtype: float64"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print('CURRENT DF SITUATION\\n')\n",
+    "\n",
+    "print(f'SHAPE = {df_train.shape}')\n",
+    "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}\\n')\n",
+    "\n",
+    "df_train['buildingqualitytypeid'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 225
+    },
+    "colab_type": "code",
+    "id": "79bB7JKdAEtX",
+    "outputId": "32b79160-fd19-4d39-988a-fc5fcd7c3284"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NULL COUNT = 0\n",
+      "VALUE COUNTS\n",
+      "-1.0     32911\n",
+      " 7.0     29310\n",
+      " 4.0     23839\n",
+      " 1.0      2627\n",
+      " 10.0     1461\n",
+      " 12.0      119\n",
+      " 8.0         5\n",
+      " 6.0         2\n",
+      " 11.0        1\n",
+      "Name: buildingqualitytypeid, dtype: int32\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].fillna(-1)\n",
+    "\n",
+    "print(f'NULL COUNT = {df_train.buildingqualitytypeid.isnull().sum()}\\nVALUE COUNTS\\n{df_train.buildingqualitytypeid.value_counts()}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "DVgF1c_p_bN1"
+   },
+   "source": [
+    "# -----current: break-----\n",
+    "- break 1 of 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 903
+    },
+    "colab_type": "code",
+    "id": "6eES-hq--NKZ",
+    "outputId": "2bc86856-507d-47bf-cfab-d29649cba819"
+   },
+   "outputs": [],
+   "source": [
+    "# make safe copy\n",
+    "test = df_train.copy()\n",
+    "df_train = test.copy()\n",
+    "# switch to pandas (figuring out what's going on)\n",
+    "df_train = df_train.to_pandas()\n",
+    "\n",
+    "print(df_train.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 762
+    },
+    "colab_type": "code",
+    "id": "mAB9bsrPAGzQ",
+    "outputId": "d847758e-212e-4de8-85c4-89b469b71c48"
+   },
+   "outputs": [],
+   "source": [
+    "# say we run this whole thing by buildingqualitytypeid\n",
+    "# drop building types that aren't seen at least 3 times in the data\n",
+    "# df_train = df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+    "\n",
+    "# BACK TO cuDF\n",
+    "df_train = cudf.from_pandas(df_train)\n",
+    "\n",
+    "print(df_train.buildingqualitytypeid.value_counts())\n",
+    "print()\n",
+    "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "print()\n",
+    "\n",
+    "type_ids = list(set(df_train.buildingqualitytypeid.values))\n",
+    "from time import sleep\n",
+    "safe = []\n",
+    "for tid in type_ids:\n",
+    "  print(tid)\n",
+    "  sleep(5)\n",
+    "  t = len(df_train.loc[df_train.buildingqualitytypeid == tid])\n",
+    "  if t > 3:\n",
+    "    safe.append(tid)\n",
+    "  else:\n",
+    "    print(f'{tid} count too low @ {t}')\n",
+    "for tid in type_ids:\n",
+    "  if tid not in safe:\n",
+    "    df_train = df_train.loc[df_train.buildingqualitytypeid != tid]\n",
+    "\n",
+    "print()\n",
+    "print(df_train.buildingqualitytypeid.value_counts())\n",
+    "print()\n",
+    "\n",
+    "df_train['buildingqualitytypeid'] = df_train['buildingqualitytypeid'].replace(-1,np.nan)\n",
+    "print(df_train.buildingqualitytypeid.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "\n",
+    "# BACK TO PANDAS\n",
+    "df_train = df_train.to_pandas()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "Zl7eXGt_g1uU"
+   },
+   "source": [
+    "# -----current: break-----\n",
+    "- break 2 of 2\n",
+    "  - below is last cell run"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 557
+    },
+    "colab_type": "code",
+    "id": "Q3ZBSOHm-79A",
+    "outputId": "e9ddb9b3-0bb0-4cf7-fa8e-ca35b9ea7f46"
+   },
+   "outputs": [],
+   "source": [
+    "# run cell above (currently broken) as would be in pandas\n",
+    "not_df_train = df_train.to_pandas()\n",
+    "not_df_train = not_df_train.groupby(\"buildingqualitytypeid\").filter(lambda x: x.buildingqualitytypeid.size > 3)\n",
+    "\n",
+    "missing_values = fillna_knn(not_df_train, \n",
+    "                            base = ['latitude', 'longitude'], \n",
+    "                            target = 'buildingqualitytypeid')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = not_df_train['buildingqualitytypeid'].isnull()\n",
+    "not_df_train.loc[missing_values_boolflag, 'buildingqualitytypeid'] = missing_values\n",
+    "\n",
+    "print(not_df_train.buildingqualitytypeid.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "bgXh5OATEacY"
+   },
+   "source": [
+    "# BELOW NOT (really) RUN\n",
+    "- if run, was in pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "colab_type": "code",
+    "id": "oTh_XPErqkHf",
+    "outputId": "3e667bca-70c5-4b66-c7d2-12d171cb140b"
+   },
+   "outputs": [],
+   "source": [
+    "print(df_train.heating_system_id.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "temp['heating_system_id']=temp['heating_system_id'].fillna(-1)\n",
+    "temp=temp.groupby(\"heating_system_id\").filter(lambda x: x.heating_system_id.size > 3)\n",
+    "temp['heating_system_id'] = temp['heating_system_id'].replace(-1,np.nan)\n",
+    "print(temp.heating_system_id.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "\n",
+    "missing_values=fillna_knn(temp,\n",
+    "                  base = [ 'latitude', 'longitude' ] ,\n",
+    "                  target = 'heating_system_id')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['heating_system_id'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'heating_system_id' ]  = missing_values\n",
+    "\n",
+    "\n",
+    "print(df_train.heating_system_id.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "colab_type": "code",
+    "id": "oVjNSkUYqnCt",
+    "outputId": "80fc7e87-36cd-44b7-96e9-ef0631c7d10c"
+   },
+   "outputs": [],
+   "source": [
+    "print(df_train.ac_id.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "temp['ac_id']=temp['ac_id'].fillna(-1)\n",
+    "temp=temp.groupby(\"ac_id\").filter(lambda x: x.ac_id.size > 3)\n",
+    "temp['ac_id'] = temp['ac_id'].replace(-1,np.nan)\n",
+    "print(temp.ac_id.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "\n",
+    "missing_values=fillna_knn(temp,\n",
+    "                  base = [ 'latitude', 'longitude' ] ,\n",
+    "                  target = 'ac_id')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['ac_id'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'ac_id' ]  = missing_values\n",
+    "\n",
+    "print(df_train.ac_id.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 278
+    },
+    "colab_type": "code",
+    "id": "qTbcYbexqr0Y",
+    "outputId": "3459affa-a41a-4241-ab62-f0dfcadda039"
+   },
+   "outputs": [],
+   "source": [
+    "#yearbuilt\n",
+    "print(df_train.yearbuilt.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "temp['yearbuilt']=temp['yearbuilt'].fillna(-1)\n",
+    "temp=temp.groupby(\"yearbuilt\").filter(lambda x: x.yearbuilt.size > 3)\n",
+    "temp['yearbuilt'] = temp['yearbuilt'].replace(-1,np.nan)\n",
+    "print(temp.yearbuilt.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "\n",
+    "missing_values=fillna_knn(temp,\n",
+    "                  base = [ 'latitude', 'longitude','buildingqualitytypeid','propertylandusetypeid' ] ,\n",
+    "                  target = 'yearbuilt')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['yearbuilt'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'yearbuilt' ]  = missing_values\n",
+    "print(df_train.yearbuilt.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Gx1LYGmfqxLk"
+   },
+   "outputs": [],
+   "source": [
+    "#location seems to be related to building quality, (knnregressor)\n",
+    "from sklearn.model_selection import KFold\n",
+    "\n",
+    "def fillna_knnr( df, base, target):\n",
+    "    data_colnames = [ target ] + base\n",
+    "    #print(\"data_colnames\",data_colnames)\n",
+    "    missing_values_boolflag = df[target].isnull() #true for missing rows, false for columns with values\n",
+    "    #print(\"miss\",missing_values_boolflag.head())\n",
+    "    not_missing_boolflag = ~missing_values_boolflag \n",
+    "    #print(\"not miss\",not_missing_boolflag.head())\n",
+    "    number_of_missing_val = missing_values_boolflag.sum()\n",
+    "    print(\"# of miss\",number_of_missing_val)\n",
+    "    not_missing_rows = df.loc[ not_missing_boolflag, data_colnames]\n",
+    "    #print(not_missing_rows.head())\n",
+    "    Y = not_missing_rows[target]\n",
+    "    X = not_missing_rows[base]\n",
+    "    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3192)\n",
+    "    metrics       = ['euclidean'] \n",
+    "    weights       = ['distance'] \n",
+    "    numNeighbors  = [5,10,15,20,25]\n",
+    "    param_grid    = dict(metric=metrics,weights=weights,n_neighbors=numNeighbors)\n",
+    "    cv            = KFold(n_splits=3,random_state=3192,shuffle=False) \n",
+    "    grid = GridSearchCV(neighbors.KNeighborsRegressor(n_jobs=-1),param_grid=param_grid,cv=cv,scoring='neg_mean_absolute_error',refit=True,return_train_score=True,verbose=1,n_jobs=-1,pre_dispatch='n_jobs')\n",
+    "    grid.fit(X_train ,Y_train)\n",
+    "    #print(\"grid.cv_results_\",grid.cv_results_)\n",
+    "    print(\"grid.best_estimator_\",grid.best_estimator_)\n",
+    "    print(\"grid.best_params_\",grid.best_params_)\n",
+    "    print(\"grid.scorer_\",grid.scorer_)\n",
+    "    #print(\"grid.n_splits_\",grid.n_splits_)\n",
+    "    y_true, y_pred = Y_test, grid.predict(X_test) \n",
+    "    Z = grid.predict(df.loc[missing_values_boolflag, base])\n",
+    "    #df.loc[ missing_values_boolflag, target ]  = Z\n",
+    "    return Z"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 606
+    },
+    "colab_type": "code",
+    "id": "pj5PXm7ozg5l",
+    "outputId": "3d42279f-221c-444c-8795-05a0832f97cd"
+   },
+   "outputs": [],
+   "source": [
+    "#garage_sqft\n",
+    "print(df_train.garage_sqft.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.loc[df_train.garagecarcnt>0,df_train.columns].copy()\n",
+    "\n",
+    "print(temp.garage_sqft.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = [ 'latitude', 'longitude','garagecarcnt'] ,\n",
+    "                  target = 'garage_sqft')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['garage_sqft'].isnull()\n",
+    "df_train.loc[missing_values_boolflag, 'garage_sqft'] = missing_values\n",
+    "print(df_train.garage_sqft.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "b7e5CFTyzg_M"
+   },
+   "outputs": [],
+   "source": [
+    "df_train = df_train.drop('parcelid', axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "YxGquCOOzhD7"
+   },
+   "outputs": [],
+   "source": [
+    "#All the other columns with missing values seems to be  integer, will need regression to be imputed,\n",
+    "#time to get categorical variables hot encoded\n",
+    "\n",
+    "#Identify numerical columns to produce a heatmap\n",
+    "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips', 'heating_system_id','has_hottub_or_spa',\n",
+    "          'just_hottub_or_spa', 'pool_with_spa_tub_yes','pool_with_spa_tub_no','propertylandusetypeid','basement_flag'\n",
+    "          ,'fireplaceflag','taxdelinquencyflag']\n",
+    "numcols = [x for x in df_train.columns if x not in catcols]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "uVZkszJEzhHj"
+   },
+   "outputs": [],
+   "source": [
+    "#total_finished_living_area_sqft\n",
+    "\n",
+    "print(df_train.total_finished_living_area_sqft.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.total_finished_living_area_sqft.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = [ 'latitude', 'longitude','basementsqft','numberofstories','poolcnt','garagecarcnt','garage_sqft','propertylandusetypeid'] ,\n",
+    "                  target = 'total_finished_living_area_sqft')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['total_finished_living_area_sqft'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'total_finished_living_area_sqft' ] = missing_values\n",
+    "print(df_train.total_finished_living_area_sqft.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "CVrTMb92zhLX"
+   },
+   "outputs": [],
+   "source": [
+    "#total_bath\t1165\n",
+    "#full_bath\t1182\n",
+    "#half_bath\t1182\n",
+    "#roomcnt\t1416\n",
+    "#bedroomcnt\t1421\n",
+    "\n",
+    "#total_finished_living_area_sqft\n",
+    "\n",
+    "print(df_train.total_bath.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.total_bath.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
+    "                  target = 'total_bath')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['total_bath'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
+    "print(df_train.total_bath.isnull().sum())#total_bath\t1165\n",
+    "#full_bath\t1182\n",
+    "#half_bath\t1182\n",
+    "#roomcnt\t1416\n",
+    "#bedroomcnt\t1421\n",
+    "\n",
+    "#total_finished_living_area_sqft\n",
+    "\n",
+    "print(df_train.total_bath.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.total_bath.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['propertylandusetypeid','total_finished_living_area_sqft' ] ,\n",
+    "                  target = 'total_bath')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['total_bath'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'total_bath' ] = missing_values\n",
+    "print(df_train.total_bath.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "BjIKlu-tzhPI"
+   },
+   "outputs": [],
+   "source": [
+    "# rop half_bath and full bath, as there are only redundant values of total_bath\n",
+    "df_train = df_train.drop(['full_bath','half_bath'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "02X1y6EBzhT9"
+   },
+   "outputs": [],
+   "source": [
+    "#bedroomcnt\t1421\n",
+    "\n",
+    "print(df_train.bedroomcnt.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.bedroomcnt.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['propertylandusetypeid','total_finished_living_area_sqft','total_bath' ] ,\n",
+    "                  target = 'bedroomcnt')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['bedroomcnt'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'bedroomcnt' ] = missing_values\n",
+    "print(df_train.bedroomcnt.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "WzkZ_qeHzhXP"
+   },
+   "outputs": [],
+   "source": [
+    "df_train['total_bath']=df_train.total_bath.round(1)\n",
+    "df_train['bedroomcnt']=df_train.bedroomcnt.round(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "QF9DtDAczhaW"
+   },
+   "outputs": [],
+   "source": [
+    "#recalculate roomcnt\t1416 as we have used imputation for total_bath and bedroomcnt\n",
+    "\n",
+    "df_train.loc[(df_train.roomcnt.isnull()),['roomcnt']]=df_train.total_bath + df_train.bedroomcnt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "U5N41TBlz60W"
+   },
+   "outputs": [],
+   "source": [
+    "print(df_train.shape)\n",
+    "df_train =df_train.loc[(df_train.total_parcel_tax.notnull()) & (df_train.land_tax.notnull()),df_train.columns]\n",
+    "\n",
+    "print(df_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "kv9h5yL3z64Q"
+   },
+   "outputs": [],
+   "source": [
+    "#lot_area_sqft\n",
+    "print(df_train.lot_area_sqft.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.lot_area_sqft.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['latitude','longitude','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+    "                  target = 'lot_area_sqft')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['lot_area_sqft'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'lot_area_sqft' ] = missing_values.round(2)\n",
+    "print(df_train.lot_area_sqft.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "GYJLHrR4z68f"
+   },
+   "outputs": [],
+   "source": [
+    "# predict structure_tax and recalculate  total_parcel_tax = land_tax + structure_tax\n",
+    "\n",
+    "\n",
+    "print(df_train.structure_tax.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.structure_tax.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+    "                  target = 'structure_tax')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['structure_tax'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'structure_tax' ] = missing_values.round(2)\n",
+    "print(df_train.structure_tax.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Ya-3K06Zz6_y"
+   },
+   "outputs": [],
+   "source": [
+    "#36 total_property_tax_2016 \n",
+    "\n",
+    "#total_parcel_tax = land_tax + structure_tax\n",
+    "    \n",
+    "df_train['total_parcel_tax']=df_train['structure_tax']+df_train['land_tax']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "8Fvr7voVz7DX"
+   },
+   "outputs": [],
+   "source": [
+    "#age of the property\n",
+    "df_train['age'] = 2016 - df_train['yearbuilt']\n",
+    "df_train=df_train.drop(['yearbuilt'],axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "xl0EOIT-z7Gl"
+   },
+   "outputs": [],
+   "source": [
+    "#total_property_tax_2016\n",
+    "\n",
+    "\n",
+    "print(df_train.total_property_tax_2016.isnull().sum())\n",
+    "print(df_train.shape)\n",
+    "temp=df_train.copy()\n",
+    "print(temp.total_property_tax_2016.isnull().sum())\n",
+    "print(temp.shape)\n",
+    "missing_values=fillna_knnr(temp,\n",
+    "                  base = ['latitude','longitude','lot_area_sqft','propertylandusetypeid','total_finished_living_area_sqft','roomcnt','numberofstories' ] ,\n",
+    "                  target = 'total_property_tax_2016')\n",
+    "\n",
+    "print(\"predicted output shape\",missing_values.shape)\n",
+    "missing_values_boolflag = df_train['total_property_tax_2016'].isnull()\n",
+    "df_train.loc[ missing_values_boolflag, 'total_property_tax_2016' ] = missing_values.round(2)\n",
+    "print(df_train.total_property_tax_2016.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "YlaxWegqz7I-"
+   },
+   "outputs": [],
+   "source": [
+    "#check missing values\n",
+    "\n",
+    "missing_df = df_train.isnull().sum(axis=0).reset_index()\n",
+    "missing_df.columns = ['column_name', 'missing_count']\n",
+    "missing_df = missing_df.loc[missing_df['missing_count']>0]\n",
+    "missing_df = missing_df.sort_values(by='missing_count')\n",
+    "print(missing_df)\n",
+    "print(missing_df.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "dIl_nqKVz7NQ"
+   },
+   "outputs": [],
+   "source": [
+    "#both the columns above miss 92% of the data, there is no related varibale to impute it, hence dropping them at this point\n",
+    "\n",
+    "df_train = df_train.drop(['finished_living_area_entryfloor_sqft2','finished_living_area_entryfloor_sqft1'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "HQJd7rgKz7Qq"
+   },
+   "outputs": [],
+   "source": [
+    "#Identify numerical columns to produce a heatmap\n",
+    "catcols = ['ac_id','buildingqualitytypeid','deck_flag','fips','pool_with_spa_tub_no','pool_with_spa_tub_yes','has_hottub_or_spa',\n",
+    "           'just_hottub_or_spa','heating_system_id','propertylandusetypeid','basement_flag','fireplaceflag','taxdelinquencyflag']\n",
+    "numcols = [x for x in df_train.columns if x not in catcols]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "VUN3a6uJz7Ut"
+   },
+   "outputs": [],
+   "source": [
+    "# 2 variables are in object datatype, coverting into numeric\n",
+    "df_train[['census_tractnumber','block_number']] = df_train[['census_tractnumber','block_number']].apply(pd.to_numeric)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "zGx77rRAz7ZZ"
+   },
+   "outputs": [],
+   "source": [
+    "# dropping categorical columns as xgboost feature selection cannot hadle it\n",
+    "\n",
+    "train_x = df_train.drop(catcols+['logerror'], axis=1)\n",
+    "\n",
+    "train_y=df_train['logerror']\n",
+    "\n",
+    "train_x = train_x.astype(float) \n",
+    "train_y = train_y.astype(float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "es_Ew2YJz7dT"
+   },
+   "outputs": [],
+   "source": [
+    "pd.options.display.max_rows = 65\n",
+    "\n",
+    "dtype_df = train_x.dtypes.reset_index()\n",
+    "dtype_df.columns = [\"Count\", \"Column Type\"]\n",
+    "#dtype_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "bvWIhR38z7fW"
+   },
+   "outputs": [],
+   "source": [
+    "df_train.loc[df_train.has_hottub_or_spa==True,'has_hottub_or_spa']=\"Yes\"\n",
+    "df_train.loc[df_train.has_hottub_or_spa==0,'has_hottub_or_spa']=\"No\"\n",
+    "\n",
+    "df_train.loc[df_train.just_hottub_or_spa==0,'just_hottub_or_spa']=\"No\"\n",
+    "df_train.loc[df_train.just_hottub_or_spa==1,'just_hottub_or_spa']=\"Yes\"\n",
+    "\n",
+    "df_train.loc[df_train.deck_flag==0,'deck_flag']=\"No\"\n",
+    "df_train.loc[df_train.deck_flag==1,'deck_flag']=\"Yes\"\n",
+    "\n",
+    "df_train.loc[df_train.basement_flag==0,'basement_flag']=\"No\"\n",
+    "df_train.loc[df_train.basement_flag==1,'basement_flag']=\"Yes\"\n",
+    "\n",
+    "df_train.loc[df_train.fireplaceflag==False,'fireplaceflag']=\"No\"\n",
+    "df_train.loc[df_train.fireplaceflag==True,'fireplaceflag']=\"Yes\"\n",
+    "#"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Ef9JjrmMz7jw"
+   },
+   "outputs": [],
+   "source": [
+    "#ac_id,heating_system_id,propertylandusetypeid\n",
+    "dummieslist=['has_hottub_or_spa','just_hottub_or_spa',\n",
+    "             'deck_flag','fips','basement_flag','fireplaceflag','taxdelinquencyflag']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Z51Zrt2Uz7oD"
+   },
+   "outputs": [],
+   "source": [
+    "df_train[dummieslist] = df_train[dummieslist].astype(object)\n",
+    "dummies = pd.get_dummies(df_train[dummieslist], prefix= dummieslist)\n",
+    "dummies.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "VHBi5Gg6z7tu"
+   },
+   "outputs": [],
+   "source": [
+    "dummies2=['pool_with_spa_tub_no','pool_with_spa_tub_yes']\n",
+    "df_train[dummies2] = df_train[dummies2].astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "oocTPKI9z7rk"
+   },
+   "outputs": [],
+   "source": [
+    "import MySQLdb\n",
+    "from sqlalchemy import create_engine\n",
+    "engineString = 'mysql+mysqldb://root:MyNewPass@localhost/sakila'\n",
+    "engine = create_engine(engineString)\n",
+    "con=engine.connect()\n",
+    "\n",
+    "with engine.connect() as con, con.begin():\n",
+    "    df_train.to_sql('df_train_f1', engine, chunksize=10000, index =False,if_exists ='replace')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "zj5ZLSPlz7XC"
+   },
+   "outputs": [],
+   "source": [
+    "numcols2=['basementsqft','total_bath','bedroomcnt','total_finished_living_area_sqft','fireplace_count','garagecarcnt',\n",
+    " 'garage_sqft','latitude','longitude','lot_area_sqft','poolcnt','pool_sqft','roomcnt','unitcnt','patio_sqft','storage_sqft',\n",
+    " 'numberofstories','structure_tax','total_parcel_tax','land_tax','total_property_tax_2016','taxdelinquencyyear','transaction_month',\n",
+    " 'census_tractnumber','block_number','age']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "fp53dotszhgA"
+   },
+   "outputs": [],
+   "source": [
+    "Y=df_train['logerror']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "O0Uaei4rzhj6"
+   },
+   "outputs": [],
+   "source": [
+    "#buildingqualitytypeid ->has order\n",
+    "le = LabelEncoder()\n",
+    "df_train['buildingqualitytypeid']=le.fit_transform(df_train.buildingqualitytypeid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "g4-g-uvtzhds"
+   },
+   "outputs": [],
+   "source": [
+    "#df_train.ac_id.value_counts()\n",
+    "#df_train.propertylandusetypeid.value_counts()\n",
+    "#'buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "SzliXafdzhRd"
+   },
+   "outputs": [],
+   "source": [
+    "X=pd.concat([dummies,df_train[dummies2],df_train[numcols2],df_train[['buildingqualitytypeid','ac_id','heating_system_id','propertylandusetypeid']]],axis=1)\n",
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "DBsZjyQd0W1N"
+   },
+   "outputs": [],
+   "source": [
+    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10, random_state=3192)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "ihXFZWcn0W5D"
+   },
+   "outputs": [],
+   "source": [
+    "#  top features\n",
+    "import xgboost as xgb\n",
+    "xgb_params = {\n",
+    "    'eta': 0.05,\n",
+    "    'max_depth': 8,\n",
+    "    'subsample': 0.7,\n",
+    "    'colsample_bytree': 0.7,\n",
+    "    'objective': 'reg:linear',\n",
+    "    'silent': 1,\n",
+    "    'seed' : 0\n",
+    "}\n",
+    "dtrain = xgb.DMatrix(X_train, Y_train, feature_names=X_train.columns.values)\n",
+    "model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=50)\n",
+    "# plot the important features #\n",
+    "fig, ax = plt.subplots(figsize=(12,18))\n",
+    "#max_num_features=50, error for no reason \n",
+    "xgb.plot_importance(model, height=0.8, ax=ax)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "TQEEzNkX0W9w"
+   },
+   "outputs": [],
+   "source": [
+    "#top features\n",
+    "xgboost_selection=['total_finished_living_area_sqft','latitude','structure_tax','total_property_tax_2016',\n",
+    "'total_parcel_tax','land_tax','longitude','lot_area_sqft','census_tractnumber','age','total_bath','bedroomcnt',\n",
+    "'block_number','transaction_month','roomcnt','taxdelinquencyyear','unitcnt','taxdelinquencyflag_No',\n",
+    "'fips_LA','garage_sqft','pool_with_spa_tub_no','has_hottub_or_spa_No','garagecarcnt','deck_flag_No',\n",
+    "'poolcnt','pool_sqft'\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "Rr_6EO4G0XEj"
+   },
+   "outputs": [],
+   "source": [
+    "# feature selection\n",
+    "#c_id,heating_system_id,propertylandusetypeid\n",
+    "from sklearn.ensemble import ExtraTreesRegressor\n",
+    "from sklearn.feature_selection import SelectFromModel\n",
+    "reg = ExtraTreesRegressor(n_estimators=500, max_depth=8, max_features='sqrt',\n",
+    "                          min_samples_split=100 ,min_samples_leaf=10, bootstrap=True,n_jobs=-1, random_state=3192)\n",
+    "reg = reg.fit(X_train, Y_train)\n",
+    "#print(\"importance\",reg.feature_importances_) \n",
+    "model = SelectFromModel(reg, prefit=True)\n",
+    "X_new = model.transform(X_train)\n",
+    "print(X_train.shape)\n",
+    "print(X_new.shape)  \n",
+    "\n",
+    "feat_names = X.columns.values\n",
+    "importances = reg.feature_importances_\n",
+    "std = np.std([tree.feature_importances_ for tree in reg.estimators_], axis=0)\n",
+    "indices = np.argsort(importances)[::-1][:26]\n",
+    "plt.figure(figsize=(12,12))\n",
+    "plt.title(\"Feature importances\")\n",
+    "plt.bar(range(len(indices)), importances[indices], color=\"r\", yerr=std[indices], align=\"center\")\n",
+    "plt.xticks(range(len(indices)), feat_names[indices], rotation='vertical')\n",
+    "plt.xlim([-1, len(indices)])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "i4FCNOG70XIU"
+   },
+   "outputs": [],
+   "source": [
+    "tree_selection=[\n",
+    "    'total_finished_living_area_sqft','structure_tax','total_property_tax_2016','total_bath','total_parcel_tax',\n",
+    "    'age','latitude','census_tractnumber','bedroomcnt','longitude','land_tax','propertylandusetypeid','block_number',\n",
+    "    'buildingqualitytypeid','numberofstories','heating_system_id','unitcnt','transaction_month','lot_area_sqft','roomcnt',\n",
+    "    'garage_sqft','garagecarcnt','pool_with_spa_tub_no','poolcnt','fips_LA','taxdelinquencyyear','patio_sqft',\n",
+    "    'taxdelinquencyflag_No','taxdelinquencyflag_Yes'\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "TmIS1WAS0XMW"
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn.linear_model import Ridge,Lasso\n",
+    "from sklearn.feature_selection import RFECV\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer\n",
+    "\n",
+    "#model=Lasso(alpha=0.2, fit_intercept=True, normalize=True, precompute=False, copy_X=True,\n",
+    " #                                max_iter=1000, \n",
+    "  #                               tol=0.0001, warm_start=False, positive=False, random_state=3192, selection='cyclic')\n",
+    "\n",
+    "#Ridge(random_state=3192,solver='auto',fit_intercept=True,normalize=True,alpha=0.1)\n",
+    "#LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True)\n",
+    "\n",
+    "\n",
+    "rfecv = RFECV(estimator=LinearRegression(n_jobs=-1,fit_intercept=True, normalize=True, copy_X=True), step=2, cv=KFold(4),scoring='neg_mean_absolute_error')\n",
+    "rfecv.fit(X_train, Y_train)\n",
+    "\n",
+    "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n",
+    "\n",
+    "# Plot number of features VS. cross-validation scores\n",
+    "plt.figure()\n",
+    "plt.xlabel(\"Number of features selected\")\n",
+    "\n",
+    "plt.ylabel(\"Cross validation score (nb of correct classifications)\")\n",
+    "plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "DIw8O00U0XPR"
+   },
+   "outputs": [],
+   "source": [
+    "rfe_selection = [i for indx,i in enumerate(X.columns) if rfecv.support_[indx] == True]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "gHA0x5_80XWy"
+   },
+   "outputs": [],
+   "source": [
+    "#Linear regression with rfe_selection selection\n",
+    "#rfe_selection, tree_selection, xgboost_selection\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import r2_score,mean_absolute_error,make_scorer,mean_squared_error\n",
+    "\n",
+    "# just to check whether normalized /not normalized data gives better results\n",
+    "parameters = {'fit_intercept':[True], 'normalize':[True,False], 'copy_X':[True]}\n",
+    "scoring = {'MAE':'neg_mean_absolute_error','MSE': make_scorer(mean_squared_error,greater_is_better=False)}\n",
+    "\n",
+    "grid1 = GridSearchCV(LinearRegression(n_jobs=-1),param_grid=parameters, scoring=scoring,cv=5,refit='MAE',\n",
+    "                    return_train_score=True,\n",
+    "                    verbose=0,n_jobs=-1,pre_dispatch='n_jobs')\n",
+    "\n",
+    "grid1.fit(X_train[rfe_selection], Y_train)\n",
+    "#print(\"5. grid best_score_\",abs(grid.best_score_))\n",
+    "Y_pred = grid1.predict(X_test[rfe_selection])\n",
+    "print(\"MAE on test data\",mean_absolute_error(Y_test,Y_pred))\n",
+    "print(\"MSE on test data\",mean_squared_error(Y_test,Y_pred))\n",
+    "print(\"R Squared data \",r2_score(Y_test,Y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "ekn4pBs60XcT"
+   },
+   "outputs": [],
+   "source": [
+    "#pca selection\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.preprocessing import scale\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.preprocessing import scale\n",
+    "%matplotlib inline\n",
+    "scaled_x = scale(X)\n",
+    "pca = PCA(n_components=None, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+    "pca.fit(scaled_x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "yFuT-wUN0XfV"
+   },
+   "outputs": [],
+   "source": [
+    "# The amount of variance that each PC explains\n",
+    "var= pca.explained_variance_ratio_\n",
+    "#Cumulative Variance explains\n",
+    "var1=np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)\n",
+    "print(var1)\n",
+    "plt.plot(var1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "iPN4OBUe0XlD"
+   },
+   "outputs": [],
+   "source": [
+    "#Looking at above plot I'm taking 28 variables\n",
+    "\n",
+    "pca = PCA(n_components=28, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+    "pca.fit(scaled_x)\n",
+    "\n",
+    "pca1=pca.fit_transform(scaled_x)\n",
+    "\n",
+    "pca = PCA(n_components=28, copy=True, whiten=True, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)\n",
+    "pca.fit(scaled_x)\n",
+    "pca2=pca.fit_transform(scaled_x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "EE4ednPC0XjX"
+   },
+   "outputs": [],
+   "source": [
+    "pcaX_train, pcaX_test, pcaY_train, pcaY_test = train_test_split(pca1, Y, test_size=0.10, random_state=3192)\n",
+    "pca2X_train, pca2X_test, pca2Y_train, pca2Y_test = train_test_split(pca2, Y, test_size=0.10, random_state=3192)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "erYMXvTG0XaK"
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import GradientBoostingRegressor\n",
+    "from sklearn.metrics import mean_absolute_error,make_scorer\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "# just to check whether normalized /not normalized data gives better results\n",
+    "\n",
+    " # 0.005 for 1200 trees.\n",
+    "param_grid={'n_estimators':[1200],'max_features':[22]}\n",
+    "\n",
+    "              \n",
+    "grid13 = GridSearchCV(GradientBoostingRegressor(subsample=0.8,min_samples_leaf=50,min_samples_split=50,max_depth=9,loss='ls',criterion='friedman_mse',learning_rate=0.005,random_state=3192),\n",
+    "                     param_grid=param_grid, cv=5,refit='MAE',\n",
+    "                    return_train_score=True,\n",
+    "                    verbose=2,n_jobs=-1,pre_dispatch='n_jobs')\n",
+    "\n",
+    "grid13.fit(pcaX_train, pcaY_train)\n",
+    "print(\"5. grid best_score_\",abs(grid13.best_score_))\n",
+    "print(\"best params\",grid13.best_params_)\n",
+    "print(\"best score\",grid13.best_score_)\n",
+    "Y_pred = grid13.predict(pcaX_test)\n",
+    "print(\"MAE on test data\",mean_absolute_error(pcaY_test,Y_pred))\n",
+    "print(\"MSE on test data\",mean_squared_error(pcaY_test,Y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "BgtbLCcR0XUx"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "FjdSCEFP0XCM"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "WzATgLxmam5w"
+   },
+   "source": [
+    "In this competition, Zillow is asking you to predict the log-error between their Zestimate and the actual sale price, given all the features of a home. The log error is defined as\n",
+    "\n",
+    "logerror=log(Zestimate)−log(SalePrice)\n",
+    "and it is recorded in the transactions file train.csv. In this competition, you are going to predict the logerror for the months in Fall 2017. Since all the real estate transactions in the U.S. are publicly available, we will close the competition (no longer accepting submissions) before the evaluation period begins.\n",
+    "\n",
+    "Train/Test split\n",
+    "You are provided with a full list of real estate properties in three counties (Los Angeles, Orange and Ventura, California) data in 2016.\n",
+    "The train data has all the transactions before October 15, 2016, plus some of the transactions after October 15, 2016.\n",
+    "The test data in the public leaderboard has the rest of the transactions between October 15 and December 31, 2016.\n",
+    "The rest of the test data, which is used for calculating the private leaderboard, is all the properties in October 15, 2017, to December 15, 2017. This period is called the \"sales tracking period\", during which we will not be taking any submissions.\n",
+    "You are asked to predict 6 time points for all properties: October 2016 (201610), November 2016 (201611), December 2016 (201612), October 2017 (201710), November 2017 (201711), and December 2017 (201712).\n",
+    "Not all the properties are sold in each time period. If a property was not sold in a certain time period, that particular row will be ignored when calculating your score.\n",
+    "If a property is sold multiple times within 31 days, we take the first reasonable value as the ground truth. By \"reasonable\", we mean if the data seems wrong, we will take the transaction that has a value that makes more sense.\n",
+    "File descriptions\n",
+    "properties_2016.csv - all the properties with their home features for 2016. Note: Some 2017 new properties don't have any data yet except for their parcelid's. Those data points should be populated when properties_2017.csv is available.\n",
+    "properties_2017.csv - all the properties with their home features for 2017 (released on 10/2/2017)\n",
+    "train_2016.csv - the training set with transactions from 1/1/2016 to 12/31/2016\n",
+    "train_2017.csv - the training set with transactions from 1/1/2017 to 9/15/2017 (released on 10/2/2017)\n",
+    "sample_submission.csv - a sample submission file in the correct format"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "zillow_kaggle_zestimate_comp.ipynb",
+   "provenance": [],
+   "version": "0.3.2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}