Skip to content

Commit

Permalink
addressed multicolliniearity (bail type modeling)
Browse files Browse the repository at this point in the history
  • Loading branch information
wasilaq committed May 26, 2021
1 parent 350bc77 commit 62412ef
Showing 1 changed file with 209 additions and 24 deletions.
233 changes: 209 additions & 24 deletions analysis/additional/analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -1827,6 +1827,41 @@
"df['zipcode_clean'].nunique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Age Group"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"26 to 33 7170\n",
"18 to 25 6926\n",
"34 to 41 4681\n",
"42 to 49 2573\n",
"50 to 57 1725\n",
"58 to 64 755\n",
"senior 275\n",
"minor 135\n",
"Name: age_group, dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['age_group'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -1864,7 +1899,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1962,7 +1997,7 @@
"3 assault Black Male "
]
},
"execution_count": 38,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1975,7 +2010,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 22,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -2016,13 +2051,13 @@
"computer offense 9\n",
"abuse of office 7\n",
"trade and commerce 6\n",
"bribery and corrupt influence 3\n",
"vehicle chop shop and illegally obtained and altered property 3\n",
"bribery and corrupt influence 3\n",
"human trafficking 1\n",
"Name: offense_type, dtype: int64"
]
},
"execution_count": 39,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2035,7 +2070,16 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"model_df = model_df.loc[model_df['offense_type'] != 'NA']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -2051,7 +2095,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -2060,7 +2104,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -2230,7 +2274,7 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 52 columns</p>\n",
"<p>5 rows × 51 columns</p>\n",
"</div>"
],
"text/plain": [
Expand Down Expand Up @@ -2304,10 +2348,10 @@
"2 0 \n",
"3 0 \n",
"\n",
"[5 rows x 52 columns]"
"[5 rows x 51 columns]"
]
},
"execution_count": 42,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2325,7 +2369,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 27,
"metadata": {},
"outputs": [
{
Expand All @@ -2348,14 +2392,13 @@
"age_group_50 to 57 inf\n",
"age_group_58 to 64 inf\n",
"age_group_senior inf\n",
"attorney_type_Private 12.709588\n",
"attorney_type_Public 12.747043\n",
"race_Asian/Pacific Islander 53.207185\n",
"race_Black 870.497495\n",
"race_Native American/Alaskan Native 4.534703\n",
"race_White 847.593477\n",
"attorney_type_Private 12.681540\n",
"attorney_type_Public 12.719864\n",
"race_Asian/Pacific Islander 53.140721\n",
"race_Black 868.079539\n",
"race_Native American/Alaskan Native 4.333754\n",
"race_White 845.257252\n",
"sex_Male 0.000000\n",
"offense_type_NA inf\n",
"offense_type_abuse of office inf\n",
"offense_type_accidents report inf\n",
"offense_type_arrest prior to requisition inf\n",
Expand Down Expand Up @@ -2395,7 +2438,7 @@
"dtype: float64"
]
},
"execution_count": 43,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2411,12 +2454,154 @@
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"age_group_18 to 25 22.851712\n",
"age_group_26 to 33 22.383033\n",
"age_group_34 to 41 16.114302\n",
"age_group_42 to 49 10.089745\n",
"age_group_50 to 57 7.351366\n",
"age_group_58 to 64 3.839864\n",
"attorney_type_Public 1.095947\n",
"race_Asian/Pacific Islander 1.062328\n",
"race_Black 1.071807\n",
"race_Native American/Alaskan Native 1.003080\n",
"sex_Male 8.463943\n",
"offense_type_accidents report 8.523330\n",
"offense_type_arrest prior to requisition 13.035113\n",
"offense_type_arson, criminal mischief, and other property destruction 67.627804\n",
"offense_type_assault 469.740732\n",
"offense_type_burglary and other criminal intrusion 122.281379\n",
"offense_type_crimes against unborn child 1.375221\n",
"offense_type_criminal homicide 7.736998\n",
"offense_type_domestic relations and abuse 29.976033\n",
"offense_type_driving after imbibing alcohol or utilizing drugs 63.861088\n",
"offense_type_drug and substance 330.438847\n",
"offense_type_falsification and intimidation 30.504301\n",
"offense_type_firearms and other dangerous articles 294.222911\n",
"offense_type_forgery and fraudulent practices 19.094108\n",
"offense_type_general traffic offense 36.304159\n",
"offense_type_inchoate crimes 257.840837\n",
"offense_type_kidnapping 19.373813\n",
"offense_type_minors 28.515375\n",
"offense_type_nuisances 1.500305\n",
"offense_type_obstructing governmental operations 28.054994\n",
"offense_type_offenses against the family 15.543443\n",
"offense_type_other offenses 8.438834\n",
"offense_type_public indecency 3.998514\n",
"offense_type_public utilities 1.333759\n",
"offense_type_riot, disorderly conduct and related offenses 12.166019\n",
"offense_type_robbery 41.812707\n",
"offense_type_serious traffic offense 15.013599\n",
"offense_type_sexual offenses 60.116388\n",
"offense_type_theft and related offenses 257.417905\n",
"offense_type_unknown statute 4.038703\n",
"offense_type_vehicles: lighting equipment 2.833856\n",
"offense_type_vehicles: other required equipment 3.541587\n",
"dtype: float64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# drop additional columns, drop all offense types with less than 10 instances\n",
"\n",
"colinear_features = ['race_White', 'attorney_type_Private', 'age_group_senior', 'offense_type_bribery and corrupt influence', 'offense_type_vehicle chop shop and illegally obtained and altered property', 'offense_type_trade and commerce', 'offense_type_abuse of office', 'offense_type_computer offense']\n",
"\n",
"vif_df = add_constant(model_df.drop(columns = ['bail_type'] + colinear_features))\n",
"\n",
"pd.Series(\n",
" [\n",
" variance_inflation_factor(vif_df.values, i) for i in range(vif_df.shape[1])\n",
" ], index = vif_df.columns\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"age_group_26 to 33 1.369731\n",
"age_group_34 to 41 1.334875\n",
"age_group_42 to 49 1.223995\n",
"age_group_50 to 57 1.160869\n",
"age_group_58 to 64 1.072877\n",
"attorney_type_Public 1.094832\n",
"race_Asian/Pacific Islander 1.062285\n",
"race_Black 1.071365\n",
"race_Native American/Alaskan Native 1.003062\n",
"sex_Male 0.035458\n",
"offense_type_accidents report 1.009558\n",
"offense_type_arrest prior to requisition 1.016067\n",
"offense_type_arson, criminal mischief, and other property destruction 1.085409\n",
"offense_type_burglary and other criminal intrusion 1.153047\n",
"offense_type_crimes against unborn child 1.000652\n",
"offense_type_criminal homicide 1.011980\n",
"offense_type_domestic relations and abuse 1.039389\n",
"offense_type_driving after imbibing alcohol or utilizing drugs 1.080861\n",
"offense_type_drug and substance 1.390079\n",
"offense_type_falsification and intimidation 1.038141\n",
"offense_type_firearms and other dangerous articles 1.404335\n",
"offense_type_forgery and fraudulent practices 1.028534\n",
"offense_type_general traffic offense 1.044046\n",
"offense_type_inchoate crimes 1.309098\n",
"offense_type_kidnapping 1.023104\n",
"offense_type_minors 1.065659\n",
"offense_type_nuisances 1.000755\n",
"offense_type_obstructing governmental operations 1.034197\n",
"offense_type_offenses against the family 1.019124\n",
"offense_type_other offenses 1.011732\n",
"offense_type_public indecency 1.004056\n",
"offense_type_public utilities 1.000538\n",
"offense_type_riot, disorderly conduct and related offenses 1.014304\n",
"offense_type_robbery 1.050323\n",
"offense_type_serious traffic offense 1.018132\n",
"offense_type_sexual offenses 1.078961\n",
"offense_type_theft and related offenses 1.312264\n",
"offense_type_unknown statute 1.004109\n",
"offense_type_vehicles: lighting equipment 1.002502\n",
"offense_type_vehicles: other required equipment 1.003910\n",
"dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Unable to interpret linear models with the above features due to high multicolliniearity.\n",
"# drop additional columns\n",
"\n",
"colinear_features += ['age_group_18 to 25', 'offense_type_assault']\n",
"\n",
"Consider: modeling offense type against these features (in particular, is race linked to the offense type an individual is charged with? - if so, why?)"
"vif_df = add_constant(model_df.drop(columns = ['bail_type'] + colinear_features))\n",
"\n",
"pd.Series(\n",
" [\n",
" variance_inflation_factor(vif_df.values, i) for i in range(vif_df.shape[1])\n",
" ], index = vif_df.columns\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_df.drop(columns = colinear_features, inplace = True)"
]
},
{
Expand Down

0 comments on commit 62412ef

Please sign in to comment.