Skip to content

Commit

Permalink
Merge pull request #90 from arangoml/release_0.0.6.8.6
Browse files Browse the repository at this point in the history
Changed pip install from testpypi to pypi.
  • Loading branch information
rajivsam authored Feb 17, 2020
2 parents 93c8987 + 224d1b4 commit 4569b74
Show file tree
Hide file tree
Showing 5 changed files with 561 additions and 950 deletions.
74 changes: 37 additions & 37 deletions examples/Arangopipe_Feature_Example_ext1.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"cells": [
{
"cell_type": "markdown",
Expand All @@ -24,30 +16,30 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install python-arango\n",
"!pip install -i https://test.pypi.org/simple/ arangopipe\n",
"!pip install arangopipe==0.0.6.8.6\n",
"!pip install pandas PyYAML==5.1.1 sklearn2\n",
"!pip install jsonpickle\n",
"!pip install seaborn\n",
"!pip install dtreeviz\n"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"data_url = \"https://raw.githubusercontent.com/arangoml/arangopipe/arangopipe_examples/examples/data/cal_housing.csv\"\n",
"df = pd.read_csv(data_url, error_bad_lines=False)\n",
"df.head()"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
Expand All @@ -69,7 +61,9 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import linear_model\n",
"df['medianHouseValue'] = df['medianHouseValue'].apply(np.log)\n",
Expand Down Expand Up @@ -98,22 +92,20 @@
" \n",
" if index % 100 == 0:\n",
" print('Completed estimating %4d points in the dataset' % (index))"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Xm = df[preds].values\n",
"Ym = df['medianHouseValue'].values\n",
"clf_0 = linear_model.Lasso(alpha=0.001, max_iter = 10000)\n",
"clf_0.fit(Xm, Ym)\n",
"Yhat_m = clf_0.predict(Xm)"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
Expand All @@ -124,15 +116,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# see section 2.2 from https://www.stat.cmu.edu/~cshalizi/402/lectures/08-bootstrap/lecture-08.pdf\n",
"# see https://web.engr.oregonstate.edu/~tgd/classes/534/slides/part9.pdf\n",
"Expval_at_i = { i : np.mean(np.array(bootstrap_Yest[i])) for i in range(df.shape[0])}\n",
"bias_at_i = {i : Expval_at_i[i] - Yhat_m[i] for i in range(df.shape[0])}\n"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
Expand All @@ -143,7 +135,9 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
Expand All @@ -152,9 +146,7 @@
"bias_values = [bias for (pt, bias) in bias_at_i.items()]\n",
"sns.kdeplot(bias_values)\n",
"plt.grid(True)"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
Expand All @@ -168,14 +160,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"cluster_labels = KMeans(n_clusters=5, random_state=0).fit_predict(Xm)\n",
"cluster_labels.shape"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "markdown",
Expand All @@ -186,19 +178,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_bias = pd.DataFrame(Xm)\n",
"df_bias['cluster'] = cluster_labels\n",
"df_bias['bias'] = bias_values\n",
"df_bias.groupby('cluster')['bias'].agg([np.mean, np.size])"
],
"execution_count": null,
"outputs": []
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from arangopipe.arangopipe_storage.arangopipe_api import ArangoPipe\n",
"from arangopipe.arangopipe_storage.arangopipe_admin_api import ArangoPipeAdmin\n",
Expand Down Expand Up @@ -250,9 +244,15 @@
" \"tag\": \"Housing-Price-Hyperopt-Experiment\",\\\n",
" \"project\": \"Housing Price Estimation Project\"}\n",
"ap.log_run(run_info)"
],
"execution_count": null,
"outputs": []
]
}
],
"metadata": {
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
]
},
"nbformat": 4,
"nbformat_minor": 1
}
Loading

0 comments on commit 4569b74

Please sign in to comment.