Skip to content

Commit

Permalink
Features selection
Browse files Browse the repository at this point in the history
  • Loading branch information
hwait committed Jan 28, 2021
1 parent 0e87deb commit 848b72d
Show file tree
Hide file tree
Showing 7 changed files with 470 additions and 315 deletions.
Binary file modified api/__pycache__/predictions_converter.cpython-38.pyc
Binary file not shown.
20 changes: 12 additions & 8 deletions api/predictions_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ def make_df(self, threshold=0.5):
df_preds['pred_draw']=np.where(df_preds['prob_draw']>threshold,1,0)
df_preds['pred_away']=np.where(df_preds['prob_away']>threshold,1,0)
df_preds=df_preds[(df_preds['pred_home']==1) | (df_preds['pred_draw']==1) |(df_preds['pred_away']==1)]
df_preds['winner_home']=df_preds['winner_home'].astype(int)
df_preds['winner_draw']=df_preds['winner_draw'].astype(int)
df_preds['winner_away']=df_preds['winner_away'].astype(int)
df_preds['pred_home']=df_preds['pred_home'].astype(int)
df_preds['pred_draw']=df_preds['pred_draw'].astype(int)
df_preds['pred_away']=df_preds['pred_away'].astype(int)
try:
df_preds['winner_home']=df_preds['winner_home'].astype(int)
df_preds['winner_draw']=df_preds['winner_draw'].astype(int)
df_preds['winner_away']=df_preds['winner_away'].astype(int)
df_preds['pred_home']=df_preds['pred_home'].astype(int)
df_preds['pred_draw']=df_preds['pred_draw'].astype(int)
df_preds['pred_away']=df_preds['pred_away'].astype(int)
except:
display(df_preds)
df_preds['win']=0
df_preds.loc[(df_preds['winner_home']==df_preds['pred_home']) & (df_preds['winner_home']==1),'win']=1
df_preds.loc[(df_preds['winner_draw']==df_preds['pred_draw']) & (df_preds['winner_draw']==1),'win']=1
Expand All @@ -56,7 +59,7 @@ def make_df(self, threshold=0.5):
self.DF=df_preds[['ds', 'country', 'liga', 't1', 't2', 'sc1', 'sc2', 'odds_home', 'odds_draw', 'odds_away','winner_home', 'winner_draw', 'winner_away','pred_home','pred_draw','pred_away','prob_home', 'prob_draw', 'prob_away','win','prf']] if self.ODDS else df_preds[['ds', 'country', 'liga', 't1', 't2', 'sc1', 'sc2', 'winner_home', 'winner_draw', 'winner_away','pred_home','pred_draw','pred_away','prob_home', 'prob_draw', 'prob_away','win']]

def performance_metrics(self):
display(api.util.get_performance_metrics(self.Y, self.YHAT, self.CLASSES))
return api.util.get_performance_metrics(self.Y, self.YHAT, self.CLASSES)

def graph(self,mode='tpfp'):
if mode == 'tpfp':
Expand All @@ -66,4 +69,5 @@ def graph(self,mode='tpfp'):

def profit(self):
df_=self.DF.loc[self.DF['odds_home']>0]
print('WAG:{}; ACC: {}; PRF: {}; ROI: {}'.format(df_.shape[0],df_.win.mean(), df_.prf.sum(), df_.prf.sum()/df_.shape[0]))
print('WAG:{}; ACC: {}; PRF: {}; ROI: {}'.format(df_.shape[0],df_.win.mean(), df_.prf.sum(), df_.prf.sum()/df_.shape[0]))
return df_.shape[0], df_.win.mean(), df_.prf.sum(), df_.prf.sum()/df_.shape[0]
116 changes: 54 additions & 62 deletions data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n %reload_ext autoreload\n"
]
}
],
"outputs": [],
"source": [
"import glob\n",
"import pandas as pd\n",
Expand All @@ -63,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -195,15 +187,15 @@
" df['wx']=np.where(df.winner=='draw',1,0)\n",
" df['w2']=np.where(df.winner=='away',1,0)\n",
" df_home=df.copy()\n",
" df_home=df_home.rename(columns={'homeScoreHT':'ht1','awayScoreHT':'ht2','sc1':'ft1','sc2':'ft2','vote_home':'vote1','vote_draw':'votex','vote_away':'vote2','home_formation':'form1','away_formation':'form2'})\n",
" df_home=df_home.rename(columns={'homeScoreHT':'ht1','awayScoreHT':'ht2','sc1':'ft1','sc2':'ft2','vote_home':'vote1','vote_draw':'votex','vote_away':'vote2','home_formation':'form1','away_formation':'form2','oddsprob_home':'oddsprob1','oddsprob_draw':'oddsprobx','oddsprob_away':'oddsprob2','drift_home':'drift1','drift_draw':'driftx','drift_away':'drift2'})\n",
" if double:\n",
" df_home['side']=1\n",
" df_away=df.copy()\n",
" df_away['side']=0\n",
" df_away=df_away.rename(columns={'homeScoreHT':'ht2','awayScoreHT':'ht1','sc1':'ft2','sc2':'ft1','vote_home':'vote2','vote_draw':'votex','vote_away':'vote1',\n",
" 'home_formation':'form2','away_formation':'form1','w1':'w2','w2':'w1','elo1':'elo2','elo2':'elo1','t1':'t2','t2':'t1',\n",
" 'tid1':'tid2','tid2':'tid1','odds_away':'odds_home','odds_home':'odds_away','oddsprob_home':'oddsprob_away',\n",
" 'oddsprob_away':'oddsprob_home','drift_home':'drift_away','drift_away':'drift_home',\n",
" 'tid1':'tid2','tid2':'tid1','odds_away':'odds_home','odds_home':'odds_away','oddsprob1':'oddsprob2',\n",
" 'oddsprob2':'oddsprob1','drift1':'drift2','drift2':'drift1',\n",
" 'possession1':'possession2', 'shont1':'shont2', 'shofft1':'shofft2', 'corners1':'corners2', \n",
" 'offsides1':'offsides2', 'fouls1':'fouls2', 'cards1':'cards2', 'gksaves1':'gksaves2',\n",
" 'possession2':'possession1', 'shont2':'shont1', 'shofft2':'shofft1', 'corners2':'corners1', \n",
Expand All @@ -226,7 +218,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -252,17 +244,17 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"#['side', 'country', 'country_id', 'liga', 'mid', 'round', 'ds', 'de', 'tid1', 'tid2', 't1', 't2', 'w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', \n",
"# 'winner', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']\n",
"COL_CUR=['country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']\n",
"COL_CUR=['country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2','oddsprob1','oddsprobx','oddsprob2','drift1','drift2','driftx']\n",
"COL_PREV=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'psht', 'psft', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']\n",
"COL_CAT=['country_id','form1', 'form2', 'pop_r']\n",
"COL_BIN=['side']\n",
"COL_NUM=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'psht', 'psft', 'vote1', 'votex', 'vote2', 'elo1', 'elo2','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']\n",
"COL_NUM=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'psht', 'psft', 'vote1', 'votex', 'vote2', 'elo1', 'elo2','oddsprob1','oddsprobx','oddsprob2','drift1','driftx','drift2', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']\n",
"COL_INF=['side','country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2', 'ft1', 'ft2','winner','odds_away','odds_draw','odds_home']\n",
"\n",
"#df=pd.read_csv('data/matches.csv', index_col=None)\n",
Expand All @@ -272,7 +264,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -345,7 +337,7 @@
],
"cell_type": "code",
"metadata": {},
"execution_count": 35,
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
Expand All @@ -365,12 +357,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -405,12 +397,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -445,12 +437,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -485,12 +477,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -525,12 +517,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -565,12 +557,12 @@
"vote2\n",
"elo1\n",
"elo2\n",
"oddsprob_home\n",
"oddsprob_draw\n",
"oddsprob_away\n",
"drift_home\n",
"drift_away\n",
"drift_draw\n",
"oddsprob1\n",
"oddsprobx\n",
"oddsprob2\n",
"drift1\n",
"driftx\n",
"drift2\n",
"graph1\n",
"graph2\n",
"possession1\n",
Expand Down Expand Up @@ -688,18 +680,18 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"208"
"0"
]
},
"metadata": {},
"execution_count": 38
"execution_count": 15
}
],
"source": [
Expand Down Expand Up @@ -791,7 +783,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -846,13 +838,13 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df['diff_vote12']=df['vote1']-df_['vote2']\n",
"df['diff_elo']=df['elo1']-df_['elo2']\n",
"df['diff_op']=df['oddsprob_home']-df_['oddsprob_away']\n",
"df['diff_op']=df['oddsprob1']-df_['oddsprob2']\n",
"df.to_csv('data/stats_generated.csv', index=False)"
]
},
Expand Down
Loading

0 comments on commit 848b72d

Please sign in to comment.