0:
- players+=self._pl(mm,match['tid2'])
- m=re.search(pKeeper, keeper)
- if m:
- goalkeepers.append(self._gk(m,match['tid2']))
+ # html=re.sub(pSpan, '', html)
+ # # Team 1
+ # tstats=self._inbetween(html,'all_stats_'+match['tid1'],'all_stats_'+match['tid2'])
+
+ # keeper_str='
RESULTS','')
- m=re.search(pCaption, html)
- caption='' if not m else m.groups()[0]
- _,_,country,liga,_=caption.split('/')
- pSeason=r'
([^<]+) '
- m=re.search(pSeason, html)
- season='' if not m else m.groups()[0]
+ #m=re.search(pCaption, html)
+ #caption='' if not m else m.groups()[0]
+ #_,_,country,liga,_=caption.split('/')
+ #pSeason=r'
([^<]+) '
+ #m=re.search(pSeason, html)
+ #season='' if not m else m.groups()[0]
html=html.replace('
','').replace(' ','')
- print(file,country,liga,season)
+ #print(file,country,liga,season)
mm=re.findall(pMatch, html)
if len(mm)>0:
for x in mm:
t,link,name,result, odds1, oddsdraw, odds2, bn=x
+ _,_,country,liga,_,_=link.split('/')
t1,t2=name.split(' - ')
scores=result.split(':')
if len(scores)<2:
continue
sc1,sc2=result.split(':')
- self.DATA.append({
+ self.DATALIST.append({
'ds':datetime.utcfromtimestamp(int(t)),
'country':country,
'liga':liga,
- 'season':season,
+ 'season':'2020/2021',
't1':t1,
't2':t2,
'sc1':sc1,
@@ -74,20 +90,21 @@ def parse_days(self):
move(self.DAYS_RAW_PATH+file,self.DAYS_RAW_PATH_OUT+file)
#break
- df=pd.DataFrame(self.DATA)
+ df=pd.DataFrame(self.DATALIST)
df['done']=0
- self._append_save(df, self.DATA_PATH+'matches.csv')
+ self._append_save_matches(df, self.DATA_PATH+'matches.csv')
def parse_matches(self):
# 0 - w1
# 1 - x
# 2 - w2
files=listdir(self.MATCHES_RAW_PATH)
- for file in files:
+ for file in tqdm(files):
if file=='.empty':
continue
with open(self.MATCHES_RAW_PATH+file, 'r', encoding='utf8') as f:
html=f.read()
+ print(file)
js=json.loads(html)
mid=file.replace('.json','')
odds=js['d']['oddsdata']['back']['E-1-2-0-0-0']['odds']
@@ -100,34 +117,40 @@ def parse_matches(self):
bookies={}
for x in odds:
- bookies[x]={
- 'mid':mid,
- 'bid':x,
- 'w1':odds[x]['0'],
- 'wx':odds[x]['1'],
- 'w2':odds[x]['2']
- }
+ if len(odds[x])==3:
+ bookies[x]={
+ 'mid':mid,
+ 'bid':x,
+ 'w1':odds[x]['0'] if '0' in odds[x] else odds[x][0],
+ 'wx':odds[x]['1'] if '1' in odds[x] else odds[x][1],
+ 'w2':odds[x]['2'] if '2' in odds[x] else odds[x][2]
+ }
for x in movement:
- bookies[x]['move_1']=str(movement[x]['0'])[0].upper()
- bookies[x]['move_x']=str(movement[x]['1'])[0].upper()
- bookies[x]['move_2']=str(movement[x]['2'])[0].upper()
+ if len(movement[x])==3:
+ bookies[x]['move_1']=str(movement[x]['0'])[0].upper() if '0' in movement[x] else 'N' if movement[x][0]==None else movement[x][0][0].upper()
+ bookies[x]['move_x']=str(movement[x]['1'])[0].upper() if '1' in movement[x] else 'N' if movement[x][1]==None else movement[x][1][0].upper()
+ bookies[x]['move_2']=str(movement[x]['2'])[0].upper() if '2' in movement[x] else 'N' if movement[x][2]==None else movement[x][2][0].upper()
for x in opening_odds:
- bookies[x]['open_1']=opening_odds[x]['0']
- bookies[x]['open_x']=opening_odds[x]['1']
- bookies[x]['open_2']=opening_odds[x]['2']
+ if len(opening_odds[x])==3:
+ bookies[x]['open_1']=opening_odds[x]['0'] if '0' in opening_odds[x] else opening_odds[x][0]
+ bookies[x]['open_x']=opening_odds[x]['1'] if '1' in opening_odds[x] else opening_odds[x][1]
+ bookies[x]['open_2']=opening_odds[x]['2'] if '2' in opening_odds[x] else opening_odds[x][2]
for x in opening_change_time:
- if opening_change_time[x]['0'] and opening_change_time[x]['1'] and opening_change_time[x]['2']:
- bookies[x]['time_open']=max([opening_change_time[x]['0'],opening_change_time[x]['1'],opening_change_time[x]['2']])
+ if len(opening_change_time[x])>0 and x in bookies:
+ bookies[x]['time_open']=opening_change_time[x]['0'] if '0' in opening_change_time[x] else opening_change_time[x][0]
for x in change_time:
- bookies[x]['time_close']=max([change_time[x]['0'],change_time[x]['1'],change_time[x]['2']])
- #move(self.MATCHES_RAW_PATH+file,self.MATCHES_RAW_PATH_OUT+file)
- self.DATA.append(pd.DataFrame([bookies[x] for x in bookies]))
- break
-
- df=pd.concat(self.DATA, axis=0)
- self._append_save(df, self.DATA_PATH+'odds.csv')
+ if len(change_time[x])==3:
+ bookies[x]['time_close']=max([change_time[x]['0'],change_time[x]['1'],change_time[x]['2']]) if '0' in change_time[x] else max([change_time[x][0],change_time[x][1],change_time[x][2]])
+ move(self.MATCHES_RAW_PATH+file,self.MATCHES_RAW_PATH_OUT+file)
+ self.DATALIST.append(pd.DataFrame([bookies[x] for x in bookies]))
+ #print(self.DATALIST)
+ #break
+ #print(self.DATALIST)
+ df=pd.concat(self.DATALIST, axis=0)
+
+ self._append_save_odds(df, self.DATA_PATH+'odds.csv')
diff --git a/api/predictions_converter.py b/api/predictions_converter.py
new file mode 100644
index 0000000..41b98de
--- /dev/null
+++ b/api/predictions_converter.py
@@ -0,0 +1,69 @@
+import pandas as pd
+import numpy as np
+from IPython.display import display
+
+import api.util
+
+class PredictionsConverter:
+ def __init__(self, provider, yhat, y, info, odds=True):
+ self.CLASSES=['HOME','DRAW','AWAY']
+ self.DATA_PATH=f'predictions/{provider}/'
+ self.LABELS_PREDICTED=yhat
+ self.LABELS=y
+ self.INFO=info.copy()
+ self.ODDS=odds
+
+ def make_df(self, threshold=0.5):
+ df_yhat=pd.DataFrame(data=self.LABELS_PREDICTED, columns=['prob_home', 'prob_draw', 'prob_away'])
+ df_y=pd.DataFrame(data=self.LABELS, columns=['winner_home', 'winner_draw', 'winner_away'])
+ df_i=self.INFO.reset_index(drop=True)
+ df_preds=pd.concat([df_i,df_y,df_yhat], axis=1)
+ if threshold=='max':
+ a=df_yhat.rank(method='max', axis=1)
+ df_preds['pred_home']=a['prob_home'].apply(lambda x: 1 if x>2 else 0)
+ df_preds['pred_draw']=a['prob_draw'].apply(lambda x: 1 if x>2 else 0)
+ df_preds['pred_away']=a['prob_away'].apply(lambda x: 1 if x>2 else 0)
+ else:
+ df_preds['pred_home']=np.where(df_preds['prob_home']>threshold,1,0)
+ df_preds['pred_draw']=np.where(df_preds['prob_draw']>threshold,1,0)
+ df_preds['pred_away']=np.where(df_preds['prob_away']>threshold,1,0)
+ df_preds=df_preds[(df_preds['pred_home']==1) | (df_preds['pred_draw']==1) |(df_preds['pred_away']==1)]
+ df_preds['winner_home']=df_preds['winner_home'].astype(int)
+ df_preds['winner_draw']=df_preds['winner_draw'].astype(int)
+ df_preds['winner_away']=df_preds['winner_away'].astype(int)
+ df_preds['pred_home']=df_preds['pred_home'].astype(int)
+ df_preds['pred_draw']=df_preds['pred_draw'].astype(int)
+ df_preds['pred_away']=df_preds['pred_away'].astype(int)
+ df_preds['win']=0
+ df_preds.loc[(df_preds['winner_home']==df_preds['pred_home']) & (df_preds['winner_home']==1),'win']=1
+ df_preds.loc[(df_preds['winner_draw']==df_preds['pred_draw']) & (df_preds['winner_draw']==1),'win']=1
+ df_preds.loc[(df_preds['winner_away']==df_preds['pred_away']) & (df_preds['winner_away']==1),'win']=1
+ if self.ODDS:
+ df_preds.loc[df_preds['pred_home']==1,'odds']=df_preds['odds_home']
+ df_preds.loc[df_preds['pred_draw']==1,'odds']=df_preds['odds_draw']
+ df_preds.loc[df_preds['pred_away']==1,'odds']=df_preds['odds_away']
+ df_preds.loc[df_preds['win']==0,'prf']=-1
+ df_preds.loc[df_preds['odds']==0,'prf']=0
+
+ df_preds['prf']=np.where(df_preds.win>0,df_preds.odds-1, df_preds['prf'])
+ df_preds = df_preds.drop_duplicates()
+ #df_preds = df_preds.rename(columns={'homeTeamShort': 't1','awayTeamShort': 't2','tournament': 'liga','ts': 'ds','homeScoreFT': 'sc1','awayScoreFT': 'sc2'})
+ self.Y=df_preds[['winner_home','winner_draw','winner_away']].values
+ self.YHAT=df_preds[['pred_home','pred_draw','pred_away']].values
+
+ # homeTeamShort awayTeamShort tournament ts homeScoreFT awayScoreFT
+ #"['sc1', 'sc2', 't2', 'liga', 't1', 'ds'] not in index"
+ self.DF=df_preds[['ds', 'country', 'liga', 't1', 't2', 'sc1', 'sc2', 'odds_home', 'odds_draw', 'odds_away','winner_home', 'winner_draw', 'winner_away','pred_home','pred_draw','pred_away','prob_home', 'prob_draw', 'prob_away','win','prf']] if self.ODDS else df_preds[['ds', 'country', 'liga', 't1', 't2', 'sc1', 'sc2', 'winner_home', 'winner_draw', 'winner_away','pred_home','pred_draw','pred_away','prob_home', 'prob_draw', 'prob_away','win']]
+
+ def performance_metrics(self):
+ display(api.util.get_performance_metrics(self.Y, self.YHAT, self.CLASSES))
+
+ def graph(self,mode='tpfp'):
+ if mode == 'tpfp':
+ api.util.get_curve(self.Y, self.YHAT, self.CLASSES)
+ elif mode== 'prc':
+ api.util.get_curve(self.Y, self.YHAT, self.CLASSES, curve='prc')
+
+ def profit(self):
+ df_=self.DF.loc[self.DF['odds_home']>0]
+ print('WAG:{}; ACC: {}; PRF: {}; ROI: {}'.format(df_.shape[0],df_.win.mean(), df_.prf.sum(), df_.prf.sum()/df_.shape[0]))
\ No newline at end of file
diff --git a/api/sofa_dp.py b/api/sofa_dp.py
new file mode 100644
index 0000000..1dbb027
--- /dev/null
+++ b/api/sofa_dp.py
@@ -0,0 +1,216 @@
+import os
+import pandas as pd
+import numpy as np
+import pickle
+import api.util
+from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler
+
+class SofaDataProvider:
+ def __init__(self, include=[],exclude=[], load=False):
+ self.LOCAL_TZ = 'Asia/Almaty'
+ self.SERVER_TZ = 'UTC'
+ self.DATA_PATH='data/sofa/'
+ self.PREREQUISITES_PATH='prerequisites/sofa/'
+ self.INCLUDE=include
+ self.EXCLUDE=exclude
+ self.COL_CAT=[]
+ self.COL_NUM=[]
+ self.COL_LBL=[]
+ self.COL_INF=[]
+ self.LOAD=load
+
+ def _load_prerequisites(self,name):
+ with open(os.path.join(self.PREREQUISITES_PATH, name),'rb') as f:
+ encoder = pickle.load(f)
+ return encoder
+
+ def _save_prerequisite(self, name, data):
+ folder='prerequisites/'
+ os.makedirs(self.PREREQUISITES_PATH, mode=0o777, exist_ok=True)
+ with open(os.path.join(self.PREREQUISITES_PATH, name), mode='wb') as f:
+ pickle.dump(data, f)
+
+ def _ff(self, columns):
+ if len(self.INCLUDE)>0:
+ return [x for x in columns if x in self.INCLUDE]
+ else:
+ return [x for x in columns if x not in self.EXCLUDE]
+
+ def _encode_teams(self, df):
+ teams_name=self.DATA_PATH+'teams.csv'
+ teams_saved=pd.read_csv(teams_name, index_col=None)
+ teams=pd.concat([pd.DataFrame(df['t1'].unique(), columns=['name']),pd.DataFrame(df['t2'].unique(), columns=['name'])]).drop_duplicates()
+ teams_new=teams[~teams.name.isin(teams_saved.name)]
+ if not teams_new.empty:
+ print('New teams!')
+ id=teams_saved.id.max()+1
+ #id=0
+ teams_list=[]
+ for row in teams_new.itertuples():
+ if len(row.name)>1:
+ teams_list.append({'name':row.name, 'id':id})
+ id+=1
+ #break
+ teams_saved=pd.concat([teams_saved,pd.DataFrame(teams_list)])
+ teams_saved.to_csv(teams_name, index=False)
+ teams_saved.columns=['t1','tid1']
+ df=df.merge(teams_saved, on='t1', how='left')
+ teams_saved.columns=['t2','tid2']
+ df=df.merge(teams_saved, on='t2', how='left')
+ return df
+
+
+
+ def _encode(self, enctype, features, outs, df):
+ if (len(self.INCLUDE)>0 and outs[0] in self.INCLUDE) or outs[0] in self.EXCLUDE:
+ return df
+ name='_'.join(features)
+ if self.LOAD:
+ encoder=self._load_prerequisites(f'{enctype}_{name}')
+ else:
+ if enctype=='sc':
+ encoder = MinMaxScaler()
+ elif enctype=='le':
+ encoder = LabelEncoder()
+ elif enctype=='ohe':
+ encoder = OneHotEncoder()
+ if len(features)==1:
+ encoder.fit(df[features].values)
+ else:
+ df1=pd.DataFrame(df[features[0]].unique(), columns=[name])
+ df2=pd.DataFrame(df[features[1]].unique(), columns=[name])
+ if enctype=='sc':
+ encoder.fit(pd.concat([df1,df2], axis=1)[name])
+ else:
+ encoder.fit(pd.concat([df1,df2])[name])
+ self._save_prerequisite(f'{enctype}_{name}', encoder)
+ if enctype=='ohe':
+ return encoder.transform(df[features].values).toarray()
+ if len(features)==1:
+ df[outs[0]] = encoder.transform(df[features].values)
+ else:
+ if enctype=='sc':
+ df[outs] = encoder.transform(df[features])
+ else:
+ df[outs[0]] = encoder.transform(df[[features[0]]])
+ df[outs[1]] = encoder.transform(df[[features[1]]])
+ return df
+
+ def _provide_statistics(self):
+ df=pd.read_csv(self.DATA_PATH+'statistics.csv', index_col=False)
+ return df
+
+ def _provide_lineups(self):
+ df=pd.read_csv(self.DATA_PATH+'lineups.csv', index_col=False)
+ return df
+
+ def _provide_formations(self, df_src):
+ self.COL_CAT+=['home_formation','away_formation']
+ df=pd.read_csv(self.DATA_PATH+'formations.csv', index_col=False)
+
+ df=self._encode('le', ['formation_h','formation_a'], ['home_formation','away_formation'], df)
+
+ df_src=df_src.merge(df, on='mid', how='left')
+ df_src=df_src.dropna(subset=['home_formation'])
+ df_src['home_formation'] = df_src['home_formation'].astype(int)
+ df_src['away_formation'] = df_src['away_formation'].astype(int)
+ return df_src
+
+ def _provide_incidents(self):
+ df=pd.read_csv(self.DATA_PATH+'incidents.csv', index_col=False)
+ return df
+
+ def _provide_graph(self, df_src):
+ df_graph=pd.read_csv(self.DATA_PATH+'graph.csv', index_col=False)
+ df_graph=df_graph.loc[(df_graph['minute']>0) & (df_graph['minute']<91)]
+ df_graph.columns=['mid','time','graph1']
+ df_graph=df_graph.drop_duplicates()
+ df_graph=df_graph.groupby('mid').graph1.sum().reset_index()
+ df_graph['graph2']=df_graph['graph1']*-1
+ df_graph=self._encode('sc', ['graph1','graph2'], ['graph1','graph2'], df_graph)
+ df_src=df_src.merge(df_graph, on='mid', how='left')
+ return df_src
+
+ def _provide_votes(self, df_src):
+ self.COL_NUM+=['vote_home','vote_draw','vote_away']
+ self.COL_CAT+=['pop_r']
+ df=pd.read_csv(self.DATA_PATH+'votes.csv', index_col=False)
+ df=df.dropna()
+ df['votes']=df[['vote1','vote2','voteX']].sum(axis=1)
+ df['vote_home']=df['vote1']/df['votes']
+ df['vote_draw']=df['voteX']/df['votes']
+ df['vote_away']=df['vote2']/df['votes']
+ df=df[['mid','vote_home','vote_draw','vote_away','votes']]
+
+ df_src=df_src.merge(df, on='mid', how='left')
+ df_src=df_src.dropna(subset=['votes'])
+ df_src['y']=df_src.ds.dt.year
+
+ name='r_votes'
+ if self.LOAD:
+ intervals=self._load_prerequisites(name)
+ else:
+ intervals={}
+ for y in range(2015,2022):
+ _,intervals[y]=pd.qcut(df_src[df_src.y==y].votes, 5, retbins=True, labels=False)
+ self._save_prerequisite(name, intervals)
+
+ for key in intervals:
+ df_src.loc[df_src.y==key, 'pop_r']=pd.cut(df_src[df_src.y==key]['votes'], bins=intervals[key], labels=False, include_lowest=True)
+ df_src.pop_r=df_src.pop_r.astype(int)
+ df_src.drop(columns=['votes','y'], inplace=True)
+ return df_src
+
+ def _provide_matches(self):
+ info_colums=[ 'mid', 'ds', 'country', 'liga','tid1','tid2', 't1', 'homeScoreHT', 'sc1', 't2', 'awayScoreHT','sc2', 'winner']
+ cat_colums=['country_id', 'round']
+ label_colums=['winner']
+ self.COL_INF+=info_colums
+ self.COL_CAT+=cat_colums
+ self.COL_LBL+=label_colums
+ cols=np.unique(info_colums+cat_colums+label_colums)
+
+ chars0=['ó','é','í','ş','ã','İ','ğ','ç','ü','É','â','Ç','õ','ł','ą','Ś','ø','ń','ț','å','Å','ß', 'æ', 'Ž','ş', 'ə','Ö','ı','á','î','ñ','ö','ź','ú','è','Ł','ę','Ş','ä','ë','ô','ș','ū','č','Š','Þ','ė','Ä','ă','ì','š','i','ć','ň','ž','ư','ơ','ê','à','ð','ő','Ü','ý','ď','Á','ř','Č','Ú']
+ chars1=['o','e','i','s','a','I','g','c','u','E','a','C','o','l','a','s','o','n','t','a','A','ss','ae','Z','sh','a','O','i','a','i','n','o','z','u','e','L','e','S','a','e','o','s','u','c','S','P','e','A','a','i','s','i','c','n','z','u','o','e','a','d','o','U','y','d','A','r','C','U']
+ dicUnicode2En=dict(zip(chars0, chars1))
+
+ df_countries=pd.read_csv(self.DATA_PATH+'countries.csv', index_col=None)
+ df_countries['Name']=df_countries['Name'].str.lower()
+ df_countries.columns=['country','countryCode']
+
+ df=pd.read_csv(self.DATA_PATH+'matches_done.csv', index_col=False)
+ df['round']=df['round'].fillna(0).astype(int)
+ df['ts']=pd.to_datetime(df['ts'])
+ df['winner']=df['winnerCode'].apply(lambda x: 'home' if x==1.0 else 'away' if x==2.0 else 'draw')
+ df = df.rename(columns={'id': 'mid','tournament': 'liga','ts': 'ds','homeScoreFT': 'sc1','awayScoreFT': 'sc2'})
+ df=df.merge(df_countries, on='country', how='left')
+ df.loc[df['country']=='england','countryCode']='GB'
+ df.loc[df['country']=='scotland','countryCode']='GB'
+ df.loc[df['country']=='czech-republic','countryCode']='CZ'
+ df.loc[df['country']=='russia','countryCode']='RU'
+ df.loc[df['country']=='usa','countryCode']='US'
+ df['t1']=df['homeTeam'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()
+ df['t2']=df['awayTeam'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()
+ df.loc[df['t1']=='','t1']='AEK Athens'
+ df.loc[df['t2']=='','t2']='AEK Athens'
+
+ df=self._encode('le', ['country'], ['country_id'], df)
+ df=self._encode_teams(df)
+ return df[cols]
+
+ def _load_data(self):
+ df=self._provide_matches()
+ df=self._provide_formations(df)
+ df=self._provide_graph(df)
+ df=self._provide_votes(df)
+ return df
+
+ def provide_data(self):
+ df=self._load_data()
+ data=df[self._ff(self.COL_NUM)].values
+ for col in self._ff(self.COL_CAT):
+ data=np.hstack([data,self._encode('ohe', [col], [col], df)])
+
+ labels=self._encode('ohe', self.COL_LBL, self.COL_LBL, df)
+ info=df[self.COL_INF]
+ return data, labels, info, df
\ No newline at end of file
diff --git a/sofa_parser.py b/api/sofa_parser.py
similarity index 84%
rename from sofa_parser.py
rename to api/sofa_parser.py
index b7b9d98..078ceb3 100644
--- a/sofa_parser.py
+++ b/api/sofa_parser.py
@@ -166,43 +166,49 @@ def parse_matches(self):
name='votes'
file_name=self.DATA_PATH+name+'.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
name='graph'
file_name=self.DATA_PATH+name+'.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
name='incidents'
file_name=self.DATA_PATH+name+'.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
name='lineups'
file_name=self.DATA_PATH+name+'.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
name='formations'
- file_name=f'data/{name}.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ file_name=self.DATA_PATH+name+'.csv'
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
name='statistics'
- file_name=f'data/{name}.csv'
- if path.exists(file_name):
- pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
- else:
- pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
+ file_name=self.DATA_PATH+name+'.csv'
+ if len(arr[name])>0:
+ if path.exists(file_name):
+ pd.concat([pd.read_csv(file_name, index_col=None), pd.DataFrame(data=arr[name])]).to_csv(file_name, index=False)
+ else:
+ pd.DataFrame(data=arr[name]).to_csv(file_name, index=False)
diff --git a/api/time_series.py b/api/time_series.py
new file mode 100644
index 0000000..1f4dc1a
--- /dev/null
+++ b/api/time_series.py
@@ -0,0 +1,79 @@
+from tslearn.clustering import TimeSeriesKMeans
+from sklearn.metrics import silhouette_score, davies_bouldin_score
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from matplotlib import pyplot
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+import numpy as np
+
+def plot_cluster_tickers(current_cluster, to):
+ fig, ax = plt.subplots(
+ int(np.ceil(current_cluster.shape[0]/4)),
+ 4,
+ figsize=(15, 3*int(np.ceil(current_cluster.shape[0]/4)))
+ )
+ fig.autofmt_xdate(rotation=45)
+ ax = ax.reshape(-1)
+
+ for index, (_, row) in enumerate(current_cluster.iterrows()):
+ ax[index].plot(row.iloc[1:to])
+ ax[index].set_title(f"{row.eventId}")
+ plt.xticks(rotation=45)
+ if index==11:
+ break
+
+ plt.tight_layout()
+ plt.show()
+
+def find_kmeans(df_scaled, metric, clasters):
+ distortions = []
+ silhouette = []
+ daviesbouldin = []
+ K = range(1, clasters)
+ for k in tqdm(K):
+ kmeanModel = TimeSeriesKMeans(n_clusters=k, metric=metric, n_jobs=20, max_iter=10)
+ #kmeanModel = TimeSeriesKMeans(n_clusters=k, metric="euclidean", n_jobs=6, max_iter=10)
+ kmeanModel.fit(df_scaled)
+ distortions.append(kmeanModel.inertia_)
+ if k > 1:
+ silhouette.append(silhouette_score(df_scaled, kmeanModel.labels_))
+ daviesbouldin.append(davies_bouldin_score(df_scaled, kmeanModel.labels_))
+
+ plt.figure(figsize=(10,4))
+ plt.plot(K, distortions, 'bx-')
+ plt.xlabel('k')
+ plt.ylabel('Distortion')
+ plt.title('Elbow Method')
+ plt.show()
+
+ plt.figure(figsize=(10,4))
+ plt.plot(K[1:], silhouette, 'bx-')
+ plt.xlabel('k')
+ plt.ylabel('Silhouette score')
+ plt.title('Silhouette')
+ plt.show()
+
+ plt.figure(figsize=(10,4))
+ plt.plot(K[1:], daviesbouldin, 'bx-')
+ plt.xlabel('k')
+ plt.ylabel('Davies-Bouldin score')
+ plt.title('Davies-Bouldin')
+ plt.show()
+
+def calc_kmeans(df_scaled, metric, n_clusters, name):
+ file_name='models/ts_{}_{}.pickle'.format(name, n_clusters)
+ if not path.exists(file_name):
+ ts_kmeans = TimeSeriesKMeans(n_clusters=n_clusters, metric=metric, n_jobs=20, max_iter=10)
+ ts_kmeans.fit(df_scaled)
+ with open(file_name, 'wb') as f:
+ pickle.dump(ts_kmeans, f)
+ else:
+ ts_kmeans=pickle.load(open(file_name, 'rb'))
+
+ for cluster_number in range(n_clusters):
+ plt.plot(ts_kmeans.cluster_centers_[cluster_number, :, 0].T, label=cluster_number)
+ plt.title("Cluster centroids")
+ plt.legend()
+ plt.show()
+ return ts_kmeans
\ No newline at end of file
diff --git a/api/util.py b/api/util.py
new file mode 100644
index 0000000..5e02c10
--- /dev/null
+++ b/api/util.py
@@ -0,0 +1,146 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from sklearn.metrics import (
+ average_precision_score,
+ precision_recall_curve,
+ roc_auc_score,
+ roc_curve,f1_score
+)
+
+def odds2prob(df):
+ df['odds_away']=1/df['odds_away']
+ df['odds_draw']=1/df['odds_draw']
+ df['odds_home']=1/df['odds_home']
+ df['margin']=df[['odds_away','odds_draw','odds_home']].sum(axis=1)
+ df['odds_away']=df['odds_away']/df['margin']
+ df['odds_draw']=df['odds_draw']/df['margin']
+ df['odds_home']=df['odds_home']/df['margin']
+ return df[['odds_away','odds_draw','odds_home']]
+
+def get_prevalence(y):
+ prevalence=np.mean(y)
+ return prevalence
+
+def get_true_pos(y, pred, th=0.5):
+ pred_t = (pred > th)
+ return np.sum((pred_t == True) & (y == 1))
+
+
+def get_true_neg(y, pred, th=0.5):
+ pred_t = (pred > th)
+ return np.sum((pred_t == False) & (y == 0))
+
+def get_accuracy(y, pred, th=0.5):
+ TP = get_true_pos(y, pred, th=th)
+ TN = get_true_neg(y, pred, th=th)
+ FP = get_false_pos(y, pred, th=th)
+ FN = get_false_neg(y, pred, th=th)
+ accuracy=(TP+TN) / (TP + TN + FP + FN)
+ return accuracy
+
+def get_false_neg(y, pred, th=0.5):
+ pred_t = (pred > th)
+ return np.sum((pred_t == False) & (y == 1))
+
+
+def get_false_pos(y, pred, th=0.5):
+ pred_t = (pred > th)
+ return np.sum((pred_t == True) & (y == 0))
+
+def get_sensitivity(y, pred, th=0.5):
+ TP = get_true_pos(y, pred, th=th)
+ FN = get_false_neg(y, pred, th=th)
+ sensitivity=TP / (TP + FN)
+ return sensitivity
+
+def get_specificity(y, pred, th=0.5):
+ TN = get_true_neg(y, pred, th=th)
+ FP = get_false_pos(y, pred, th=th)
+ specificity=TN / (TN + FP)
+ return specificity
+
+def get_ppv(y, pred, th=0.5):
+ TP = get_true_pos(y, pred, th=th)
+ FP = get_false_pos(y, pred, th=th)
+ PPV=TP / (TP+FP)
+ return PPV
+
+def get_npv(y, pred, th=0.5):
+ TN = get_true_neg(y, pred, th=th)
+ FN = get_false_neg(y, pred, th=th)
+ NPV = TN / (TN+FN)
+ return NPV
+
+def get_performance_metrics(y, pred, class_labels, tp=get_true_pos,
+ tn=get_true_neg, fp=get_false_pos,
+ fn=get_false_neg,
+ acc=get_accuracy, prevalence=get_prevalence, spec=get_specificity,
+ sens=get_sensitivity, ppv=get_ppv, npv=get_npv, auc=roc_auc_score, f1=f1_score,
+ thresholds=[]):
+ if len(thresholds) != len(class_labels):
+ thresholds = [.5] * len(class_labels)
+
+ columns = ["Name", "TP", "TN", "FP", "FN", "Accuracy", "Prevalence", "Sensitivity", "Specificity", "PPV", "NPV", "AUC", "F1", "Threshold"]
+ res=[]
+
+ for i in range(len(class_labels)):
+ res.append({
+ columns[0] : class_labels[i],
+ columns[1] : round(tp(y[:, i], pred[:, i]), 3) if tp != None else "Not Defined",
+ columns[2] : round(tn(y[:, i], pred[:, i]), 3) if tn != None else "Not Defined",
+ columns[3] : round(fp(y[:, i], pred[:, i]), 3) if fp != None else "Not Defined",
+ columns[4] : round(fn(y[:, i], pred[:, i]), 3) if fn != None else "Not Defined",
+ columns[5] : round(acc(y[:, i], pred[:, i], thresholds[i]), 3) if acc != None else "Not Defined",
+ columns[6] : round(prevalence(y[:, i]), 3) if prevalence != None else "Not Defined",
+ columns[7] : round(sens(y[:, i], pred[:, i], thresholds[i]), 3) if sens != None else "Not Defined",
+ columns[8] : round(spec(y[:, i], pred[:, i], thresholds[i]), 3) if spec != None else "Not Defined",
+ columns[9] : round(ppv(y[:, i], pred[:, i], thresholds[i]), 3) if ppv != None else "Not Defined",
+ columns[10] : round(npv(y[:, i], pred[:, i], thresholds[i]), 3) if npv != None else "Not Defined",
+ columns[11] : round(auc(y[:, i], pred[:, i]), 3) if auc != None else "Not Defined",
+ columns[12] : round(f1(y[:, i], pred[:, i] > thresholds[i]), 3) if f1 != None else "Not Defined",
+ columns[13] : round(thresholds[i], 3)
+ })
+ df = pd.DataFrame(res)
+ return df
+
+
+def print_confidence_intervals(class_labels, statistics):
+ df = pd.DataFrame(columns=["Mean AUC (CI 5%-95%)"])
+ for i in range(len(class_labels)):
+ mean = statistics.mean(axis=1)[i]
+ max_ = np.quantile(statistics, .95, axis=1)[i]
+ min_ = np.quantile(statistics, .05, axis=1)[i]
+ df.loc[class_labels[i]] = ["%.2f (%.2f-%.2f)" % (mean, min_, max_)]
+ return df
+
+
+def get_curve(gt, pred, target_names, curve='roc'):
+ for i in range(len(target_names)):
+ if curve == 'roc':
+ curve_function = roc_curve
+ auc_roc = roc_auc_score(gt[:, i], pred[:, i])
+ label = target_names[i] + " AUC: %.3f " % auc_roc
+ xlabel = "False positive rate"
+ ylabel = "True positive rate"
+ a, b, _ = curve_function(gt[:, i], pred[:, i])
+ plt.figure(1, figsize=(7, 7))
+ plt.plot([0, 1], [0, 1], 'k--')
+ plt.plot(a, b, label=label)
+ plt.xlabel(xlabel)
+ plt.ylabel(ylabel)
+
+ plt.legend(loc='upper center', bbox_to_anchor=(1.3, 1),
+ fancybox=True, ncol=1)
+ elif curve == 'prc':
+ precision, recall, _ = precision_recall_curve(gt[:, i], pred[:, i])
+ average_precision = average_precision_score(gt[:, i], pred[:, i])
+ label = target_names[i] + " Avg.: %.3f " % average_precision
+ plt.figure(1, figsize=(7, 7))
+ plt.step(recall, precision, where='post', label=label)
+ plt.xlabel('Recall')
+ plt.ylabel('Precision')
+ plt.ylim([0.0, 1.05])
+ plt.xlim([0.0, 1.0])
+ plt.legend(loc='upper center', bbox_to_anchor=(1.3, 1),
+ fancybox=True, ncol=1)
diff --git a/bf.py b/bf.py
index 89270fc..d108f18 100644
--- a/bf.py
+++ b/bf.py
@@ -149,6 +149,7 @@ def convert_matches(df_matches):
match_changes=[]
odds_changes=[]
for f in tqdm(listdir(in_path)):
+ #print(f)
read_match(int(f),in_path)
#break
out_path='{}/{:%Y-%b-%d}'.format(OUT_PATH, d)
diff --git a/bind.ipynb b/bind.ipynb
index 6a1f7de..c0638d1 100644
--- a/bind.ipynb
+++ b/bind.ipynb
@@ -15,10 +15,10 @@
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
- "display_name": "Python 3.8.5 64-bit ('dmenv': conda)",
+ "display_name": "Python 3.8.5 64-bit",
"metadata": {
"interpreter": {
- "hash": "7443be6333979a5671edb97a6208c12f43c7c42bc49d43d9a0706d3198065d4b"
+ "hash": "12f2fd9a8da6c9ddda222d67ff20ee53b82617d5a9ac88eb47f60b586ce1b05e"
}
}
}
@@ -28,7 +28,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -37,45 +37,136 @@
"import time\n",
"import re\n",
"import pandas as pd\n",
+ "from IPython.display import display\n",
"import numpy as np\n",
"import random\n",
"import pytz\n",
- "from tqdm import tqdm\n",
+ "#from tqdm import tqdm\n",
"import bz2\n",
"import json\n",
"import glob\n",
- "local_tz = 'UTC'"
+ "local_tz = 'UTC'\n",
+ "from api.data_collector import DataCollector"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Empty DataFrame\nColumns: [team]\nIndex: []\n"
+ ]
+ }
+ ],
+ "source": [
+ "dp=DataCollector()\n",
+ "df_sofa=dp._provide_sofa()\n",
+ "df_op=dp._provide_op()\n",
+ "df_elo=dp._provide_elo()\n",
+ "df_op_=df_op[df_op['ds']>=df_sofa.ds.min()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "chars0=['ó','é','í','ş','ã','İ','ğ','ç','ü','É','â','Ç','õ','ł','ą','Ś','ø','ń','ț','å','Å','ß', 'æ', 'Ž','ş', 'ə','Ö','ı','á','î','ñ','ö','ź','ú','è','Ł','ę','Ş','ä','ë','ô','ș','ū','č','Š','Þ','ė','Ä','ă','ì','š','i','ć','ň','ž','ư','ơ','ê','à','ð','ő','Ü','ý','ď','Á','ř','Č','Ú']\n",
- "chars1=['o','e','i','s','a','I','g','c','u','E','a','C','o','l','a','s','o','n','t','a','A','ss','ae','Z','sh','a','O','i','a','i','n','o','z','u','e','L','e','S','a','e','o','s','u','c','S','P','e','A','a','i','s','i','c','n','z','u','o','e','a','d','o','U','y','d','A','r','C','U']\n",
- "dicUnicode2En=dict(zip(chars0, chars1))"
+ "df_sofa_binded, df_sofa_ = bind_full(df_sofa,df_op_)\n",
+ "df_sofa_binded=bind_iteration('FIRST',df_sofa_binded,df_sofa_, df_op_)\n",
+ "df_sofa_binded=bind_iteration('SECOND',df_sofa_binded,df_sofa_, df_op_)\n",
+ "df_sofa_binded=bind_iteration('THIRD',df_sofa_binded,df_sofa_, df_op_)"
]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_teams=pd.read_csv('data/teams.csv', index_col=None).sort_values(by='op_t')\n",
+ "mask = df_teams.duplicated(subset=['country','op_t'], keep=False)\n",
+ "display(df_teams[mask])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_elo.team=df_elo.team.str.lower()\n",
+ "df_elo=df_elo.rename(columns={'country':'code'})\n",
+ "df_elo_teams=df_elo[['team','code','id']].drop_duplicates().sort_values(by='team')\n",
+ "df_countries=pd.read_csv('data/elo/countries.csv', index_col=None)\n",
+ "df_elo_teams=df_elo_teams.merge(df_countries, on='code', how='left')\n",
+ "df_elo_teams.to_csv('data/elo/elo_teams.csv', index=False)\n",
+ "df_elo_teams['first']=df_elo_teams['team'].apply(lambda x: x.split(' ')[0])\n",
+ "df_elo_teams['last']=df_elo_teams['team'].apply(lambda x: x.split(' ')[-1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_teams=pd.read_csv('data/teams.csv', index_col=None)\n",
+ "df_teams"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_elo_merged=df_elo.merge(df_teams[['id','tid']], on='id', how='left').drop_duplicates()\n",
+ "df_elo_merged=df_elo_merged.dropna()\n",
+ "df_elo_merged"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "source": [
+ "# Load data\n",
+ "## SofaScore"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "df_countries=pd.read_csv('data/countries.csv', index_col=None)\n",
+ "df_countries=pd.read_csv('data/sofa/countries.csv', index_col=None)\n",
"df_countries['Name']=df_countries['Name'].str.lower()\n",
"df_countries.columns=['country','countryCode']\n",
"\n",
- "df_bf=pd.read_csv('data/bf/bf_matches.csv', index_col=None)\n",
- "df_bf=df_bf[~df_bf['halfTime'].isna()]\n",
- "df_bf=df_bf[df_bf['halfTime']!='0']\n",
- "df_bf['inplayTime']=pd.to_datetime(df_bf['inplayTime'])\n",
- "df_bf['home_name_low']=df_bf['home_name'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
- "df_bf['away_name_low']=df_bf['away_name'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "chars0=['ó','é','í','ş','ã','İ','ğ','ç','ü','É','â','Ç','õ','ł','ą','Ś','ø','ń','ț','å','Å','ß', 'æ', 'Ž','ş', 'ə','Ö','ı','á','î','ñ','ö','ź','ú','è','Ł','ę','Ş','ä','ë','ô','ș','ū','č','Š','Þ','ė','Ä','ă','ì','š','i','ć','ň','ž','ư','ơ','ê','à','ð','ő','Ü','ý','ď','Á','ř','Č','Ú']\n",
+ "chars1=['o','e','i','s','a','I','g','c','u','E','a','C','o','l','a','s','o','n','t','a','A','ss','ae','Z','sh','a','O','i','a','i','n','o','z','u','e','L','e','S','a','e','o','s','u','c','S','P','e','A','a','i','s','i','c','n','z','u','o','e','a','d','o','U','y','d','A','r','C','U']\n",
+ "dicUnicode2En=dict(zip(chars0, chars1))\n",
"\n",
- "df_ss=pd.read_csv('data/matches_done.csv', index_col=None)\n",
+ "df_ss=pd.read_csv('data/sofa/matches_done.csv', index_col=None)\n",
"df_ss['ts']=pd.to_datetime(df_ss['ts'])\n",
"df_ss=df_ss.merge(df_countries, on='country', how='left')\n",
"df_ss.loc[df_ss['country']=='england','countryCode']='GB'\n",
@@ -87,470 +178,292 @@
"df_ss.loc[df_ss['awayTeamShort']=='???','awayTeamShort']='AEK Athens'\n",
"df_ss['homeTeamShortLow']=df_ss['homeTeamShort'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
"df_ss['awayTeamShortLow']=df_ss['awayTeamShort'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "countries_of_interest=sorted(list(df_ss.country.unique()))\n",
+ "countries_of_interest+=['']"
+ ]
+ },
+ {
+ "source": [
+ "## Betfair"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_bf=pd.read_csv('data/bf/bf_matches.csv', index_col=None)\n",
+ "df_bf=df_bf[~df_bf['halfTime'].isna()]\n",
+ "df_bf=df_bf[df_bf['halfTime']!='0']\n",
+ "df_bf['inplayTime']=pd.to_datetime(df_bf['inplayTime'])\n",
+ "df_bf['home_name_low']=df_bf['home_name'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "df_bf['away_name_low']=df_bf['away_name'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
"\n",
- "#df_ss1=pd.read_csv('data/matches2.csv', index_col=None)\n",
- "#pd.concat([df_ss,df_ss1], axis=0).to_csv('data/matches_done2.csv', index=False)"
+ "df_countries=pd.read_csv('data/sofa/countries.csv', index_col=None)\n",
+ "df_countries['Name']=df_countries['Name'].str.lower()\n",
+ "df_countries.columns=['country','countryCode']\n",
+ "df_bf=df_bf.merge(df_countries, on='countryCode', how='left')\n",
+ "\n",
+ "df_bf=df_bf[df_bf['countryCode']!='CS']\n",
+ "df_bf.loc[df_bf['countryCode'].isna(),'country']=''\n",
+ "countries_replacement={'united kingdom':'england', 'russian federation':'russia','united states':'usa','czech republic':'czech-republic','korea, republic of':'south-korea' }\n",
+ "df_bf['country']=df_bf['country'].replace(countries_replacement)\n",
+ "df_bf=df_bf.loc[df_bf['country'].isin(countries_of_interest)]"
]
},
+ {
+ "source": [
+ "## Fbref"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
+ "def get_country(x):\n",
+ " if x==0:\n",
+ " return ''\n",
+ " res=df_countries.loc[df_countries['comps'].str.contains(str(int(x))),'name']\n",
+ " if len(res.index)>0:\n",
+ " return res.values[0]\n",
+ " else:\n",
+ " return ''\n",
+ " \n",
+ "df_countries=pd.read_csv('data/fbref/countries.csv', index_col=None)\n",
+ "df_countries['name']=df_countries['name'].str.lower()\n",
+ "#df_countries.columns=['country','countryCode']\n",
+ "\n",
+ "df_fbref=pd.read_csv('data/fbref/matches_full.csv', index_col=None)\n",
+ "df_fbref=df_fbref[~df_fbref['ds_venue'].isna()]\n",
+ "df_fbref['ts']=df_fbref['ds_venue'].apply(lambda x: datetime.utcfromtimestamp(x))\n",
+ "\n",
+ "df_fbref['home_name_low']=df_fbref['team1'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "df_fbref['away_name_low']=df_fbref['team2'].replace(dicUnicode2En, regex=True).replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "df_fbref['country_id']=df_fbref['country_id'].fillna(0)\n",
+ "df_fbref['country']=df_fbref['country_id'].apply(lambda x: get_country(x))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.DataFrame([df_fbref.isna().sum(),df_fbref.isna().sum()/1230 ]).T"
+ ]
+ },
+ {
+ "source": [
+ "## OP"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " country name cnt clear\n",
+ "46 chile Colo Colo (Chi) Chi Colo Colo\n",
+ "80 austria Rapid Vienna (Aut) Aut Rapid Vienna\n",
+ "287 france Bordeaux W (Fra) Fra Bordeaux W\n",
+ "361 spain Melilla (Esp) Esp Melilla\n",
+ "401 spain Numancia (Esp) Esp Numancia\n",
+ "... ... ... ... ...\n",
+ "143800 spain RSD Alcala RSD Alcala\n",
+ "144125 world Hienghene Hienghene\n",
+ "144223 spain Chinato Chinato\n",
+ "145021 world Bucaspor Bucaspor\n",
+ "145205 europe Potsdam W Potsdam W\n",
+ "\n",
+ "[6622 rows x 4 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n country \n name \n cnt \n clear \n \n \n \n \n 46 \n chile \n Colo Colo (Chi) \n Chi \n Colo Colo \n \n \n 80 \n austria \n Rapid Vienna (Aut) \n Aut \n Rapid Vienna \n \n \n 287 \n france \n Bordeaux W (Fra) \n Fra \n Bordeaux W \n \n \n 361 \n spain \n Melilla (Esp) \n Esp \n Melilla \n \n \n 401 \n spain \n Numancia (Esp) \n Esp \n Numancia \n \n \n ... \n ... \n ... \n ... \n ... \n \n \n 143800 \n spain \n RSD Alcala \n \n RSD Alcala \n \n \n 144125 \n world \n Hienghene \n \n Hienghene \n \n \n 144223 \n spain \n Chinato \n \n Chinato \n \n \n 145021 \n world \n Bucaspor \n \n Bucaspor \n \n \n 145205 \n europe \n Potsdam W \n \n Potsdam W \n \n \n
\n
6622 rows × 4 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 68
+ }
+ ],
+ "source": [
+ "df_op=pd.read_csv('data/op/matches1.csv', index_col=None)\n",
+ "df_op['home_name_low']=df_op['t1'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "df_op['away_name_low']=df_op['t2'].replace('[^a-zA-Z0-9 ]', '', regex=True).str.lower()\n",
+ "df_op['ts']=pd.to_datetime(df_op['ds'], format='%y/%m/%d %H:%M')\n",
+ "\n",
+ "t1=df_op[['country','t1']].rename(columns={'t1': 'name'})\n",
+ "t2=df_op[['country','t2']].rename(columns={'t2': 'name'})\n",
+ "\n",
+ "df_teams=pd.DataFrame(pd.concat([t1,t2], axis=0)).drop_duplicates()\n",
+ "\n",
+ "df_countries=pd.read_csv('data/op/countries.csv', index_col=None)\n",
+ "countries=dict(zip(df_countries.abbr, df_countries.name.str.lower()))\n",
+ "\n",
+ "df_teams['cnt']=df_teams['name'].apply(lambda x: x.split('(')[1].replace(')','').strip() if '(' in x else '')\n",
+ "df_teams1=df_teams.loc[df_teams['cnt'].str.len()>1]\n",
+ "df_teams2=df_teams.loc[df_teams['cnt'].str.len()<1]\n",
+ "df_teams1['country']=df_teams1.cnt.apply(lambda x: countries[x] if x in countries else 'other')\n",
+ "df_teams=pd.concat([df_teams1,df_teams2],axis=0)\n",
+ "df_teams['clear']=df_teams.name.apply(lambda x: x.split('(')[0].strip())\n",
+ "df_teams"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " clear tid\n",
+ "0 Boreham Wood 0000\n",
+ "1 Dortmund 0001\n",
+ "2 Barcelona SC 0002\n",
+ "3 Cambridge Utd 0003\n",
+ "4 Esteghlal F.C. 0004\n",
+ "... ... ...\n",
+ "4546 Coleraine 4546\n",
+ "4547 Conquense 4547\n",
+ "4548 Loures 4548\n",
+ "4549 Qandi 4549\n",
+ "4550 Chanmari 4550\n",
+ "\n",
+ "[4551 rows x 2 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n clear \n tid \n \n \n \n \n 0 \n Boreham Wood \n 0000 \n \n \n 1 \n Dortmund \n 0001 \n \n \n 2 \n Barcelona SC \n 0002 \n \n \n 3 \n Cambridge Utd \n 0003 \n \n \n 4 \n Esteghlal F.C. \n 0004 \n \n \n ... \n ... \n ... \n \n \n 4546 \n Coleraine \n 4546 \n \n \n 4547 \n Conquense \n 4547 \n \n \n 4548 \n Loures \n 4548 \n \n \n 4549 \n Qandi \n 4549 \n \n \n 4550 \n Chanmari \n 4550 \n \n \n
\n
4551 rows × 2 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ],
+ "source": [
+ "df_clear=pd.DataFrame(df_teams.clear.unique(), columns=['clear'])\n",
+ "df_clear['tid'] = df_clear.index\n",
+ "df_clear['tid'] = df_clear.tid.apply(lambda x: '{:04.0f}'.format(x))\n",
+ "df_teams=df_teams.merge(df_clear, on=['clear'], how='left')\n",
+ "df_teams"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_teams=df_teams.merge(df_clear, on=['clear'], how='left')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " name clear\n",
+ "tid \n",
+ "0165 3 3\n",
+ "0619 3 3\n",
+ "0798 3 3\n",
+ "0846 3 3\n",
+ "0909 3 3"
+ ],
+ "text/html": "
\n\n
\n \n \n \n name \n clear \n \n \n tid \n \n \n \n \n \n \n 0165 \n 3 \n 3 \n \n \n 0619 \n 3 \n 3 \n \n \n 0798 \n 3 \n 3 \n \n \n 0846 \n 3 \n 3 \n \n \n 0909 \n 3 \n 3 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
+ ],
+ "source": [
+ "gr=df_teams.groupby(['tid']).count()\n",
+ "gr.loc[gr['clear']>2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " name clear tid\n",
+ "622 River Plate River Plate 0619\n",
+ "2453 River Plate (Arg) River Plate 0619\n",
+ "3995 River Plate (Uru) River Plate 0619"
+ ],
+ "text/html": "
\n\n
\n \n \n \n name \n clear \n tid \n \n \n \n \n 622 \n River Plate \n River Plate \n 0619 \n \n \n 2453 \n River Plate (Arg) \n River Plate \n 0619 \n \n \n 3995 \n River Plate (Uru) \n River Plate \n 0619 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 31
+ }
+ ],
+ "source": [
+ "df_teams[df_teams.tid=='0619']"
+ ]
+ },
+ {
+ "source": [
+ "# Binding\n",
+ "## SS - BF"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cc_to_empty=['africa','asia','europe','north-central-america','south-america','world']\n",
"def slice_df(df, cc, str1, str2, type='00'):\n",
+ " if cc in cc_to_empty:\n",
+ " cc=''\n",
" if type=='00':\n",
- " return df.loc[(df['countryCode']==cc) & (df['home_name_low']==str1) & (df['away_name_low']==str2)]\n",
+ " return df.loc[(df['country']==cc) & (df['home_name_low']==str1) & (df['away_name_low']==str2)]\n",
" elif type=='10':\n",
- " return df.loc[(df['countryCode']==cc) & (df['home_name_low'].str.contains(str1)) & (df['away_name_low']==str2)]\n",
+ " return df.loc[(df['country']==cc) & (df['home_name_low'].str.contains(str1)) & (df['away_name_low']==str2)]\n",
" elif type=='01':\n",
- " return df.loc[(df['countryCode']==cc) & (df['home_name_low']==str1) & (df['away_name_low'].str.contains(str2))]\n",
+ " return df.loc[(df['country']==cc) & (df['home_name_low']==str1) & (df['away_name_low'].str.contains(str2))]\n",
" else:\n",
- " return df.loc[(df['countryCode']==cc) & (df['home_name_low'].str.contains(str1)) & (df['away_name_low'].str.contains(str2))]"
+ " return df.loc[(df['country']==cc) & (df['home_name_low'].str.contains(str1)) & (df['away_name_low'].str.contains(str2))]"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "/Mirandes - Ponferradina/Ponferradina\n",
- "found: Reggina /Reggina - Pescara/Pescara\n",
- "found: SPAL/Spal - Cosenza/Cosenza\n",
- "found: Venezia/Venezia - Frosinone/Frosinone\n",
- "found: Entella/Entella - Reggiana/Reggiana\n",
- "found: Vicenza/LR Vicenza Virtus - Pordenone/Pordenone\n",
- "found: Auxerre/Auxerre - AC Ajaccio/AC Ajaccio\n",
- "found: Clermont/Clermont - Rodez/Rodez\n",
- "found: Dunkerque/Dunkerque - Guingamp/Guingamp\n",
- "found: Admira/Admira Wacker - Ried/SV Ried\n",
- "found: La Serena/La Serena - Unión Española/Union Espanola\n",
- "found: Sarpsborg 08/Sarpsborg - Stabæk/Stabaek\n",
- "found: Montana/Montana - Slavia/Slavia Sofia\n",
- "found: Šibenik/Sibenik - Osijek/Osijek\n",
- "found: Gorica/HNK Gorica - Hajduk/Hajduk Split\n",
- "found: Opava/SFC Opava - Teplice/Teplice\n",
- "found: Cova da Piedade/Cova da Piedade - Arouca/Arouca\n",
- "found: Domžale/Domzale - Gorica/Gorica\n",
- "found: Inhulets/Inhulets Petrove - FC Minaj/FC Minaj\n",
- "2020-10-04 00:00:00\n",
- "found: Southampton/Southampton - West Brom/West Brom\n",
- "found: Man Utd/Man Utd - Tottenham/Tottenham\n",
- "found: Osasuna/Osasuna - Celta/Celta Vigo\n",
- "found: Bayern M./Bayern Munich - Hertha/Hertha Berlin\n",
- "found: Parma/Parma - Verona/Verona\n",
- "found: Milan/AC Milan - Spezia/Spezia\n",
- "found: Metz/Metz - Lorient/Lorient\n",
- "found: Groningen/FC Groningen - Ajax/Ajax\n",
- "found: Sparta/Sparta Rotterdam - AZ/Az Alkmaar\n",
- "found: Famalicão/Famalicao - Rio Ave/Rio Ave\n",
- "found: Benfica/Benfica - Farense/Farense\n",
- "found: Rubin Kazan/Rubin Kazan - Akhmat/Akhmat Grozny\n",
- "found: Denizlispor/Denizlispor - Konyaspor/Konyaspor\n",
- "found: Alanyaspor/Alanyaspor - Hatayspor/Hatayspor\n",
- "found: Botafogo/Botafogo - Fluminense/Fluminense\n",
- "found: Toluca/Toluca - Cruz Azul/Cruz Azul\n",
- "found: Minnesota Utd/Minnesota Utd - FC Cincinnati/FC Cincinnati\n",
- "found: Dallas/FC Dallas - Columbus/Columbus\n",
- "found: San Jose/San Jose Earthquakes - LA Galaxy/LA Galaxy\n",
- "found: Castellón/CD Castellon - Leganés/Leganes\n",
- "found: Sturm/Sturm Graz - SCR Altach/SCR Altach\n",
- "found: U. Católica/Univ Catolica (Chile) - Uni. de Chile/Universidad de Chile\n",
- "found: Lyngby BK/Lyngby - SønderjyskE/SonderjyskE\n",
- "found: OB/OB - Vejle/Vejle\n",
- "found: AaB/AaB - AGF/AGF\n",
- "found: Horsens/AC Horsens - Midtjylland/Midtjylland\n",
- "found: Hammarby/Hammarby - Djurgården/Djurgardens\n",
- "found: Mjällby/Mjallby - Helsingborg/Helsingborgs\n",
- "found: Östersund/Ostersunds FK - AIK/AIK\n",
- "found: Basel/FC Basel - Luzern/Luzern\n",
- "found: Balıkesirspor/Balikesirspor - Menemen/Menemen Belediyespor\n",
- "found: Adana DS/Adana Demirspor - Adanaspor/Adanaspor\n",
- "found: Beroe/Beroe Stara Za - Lokomotiv/Lokomotiv Plovdiv\n",
- "found: Etar/Etar - CSKA Sofia/CSKA Sofia\n",
- "found: Varaždin/Varazdin - Dinamo/Dinamo Zagreb\n",
- "found: Rijeka/Rijeka - Slaven/Slaven Belupo\n",
- "found: FC Slovan Liberec/Slovan Liberec - Příbram/Pribram\n",
- "found: Sigma Olomouc/Sigma Olomouc - Viktoria Plzeň/Plzen\n",
- "found: Saarbrücken/Saarbrucken - Hallescher/Hallescher FC\n",
- "found: Panathinaikos/Panathinaikos - Aris/Aris\n",
- "found: PAOK/PAOK - OFI/OFI\n",
- "found: Kongsvinger/Kongsvinger - KFUM Oslo/KFUM Oslo\n",
- "found: Benfica B/Benfica B - Estoril/Estoril Praia\n",
- "found: Botoșani/Botosani - Chindia T./Chindia Targoviste\n",
- "found: Gaz Metan/Gaz Metan Medias - Hermannstadt/Hermannstadt\n",
- "found: St. Johnstone/St Johnstone - Celtic/Celtic\n",
- "found: Pohronie/Pohronie - Slovan/Slovan Bratislava\n",
- "found: Trenčín/Trencin - Senica/FK Senica\n",
- "found: Celje/NK Celje - Tabor Sežana/Tabor Sezana\n",
- "found: Olimpik/Olimpik Donetsk - Oleksandria/Oleksandria\n",
- "found: Dynamo Kyiv/Dynamo Kiev - Zorya Luhansk/Zorya\n",
- "2020-10-05 00:00:00\n",
- "found: Querétaro/Queretaro - Monterrey/Monterrey\n",
- "found: Tijuana/Tijuana - Guadalajara/Guadalajara\n",
- "found: Salt Lake/Real Salt Lake - Los Angeles/Los Angeles FC\n",
- "found: Wehen/Wehen Wiesbaden - Kaiserslautern/Kaiserslautern\n",
- "found: Lamia/Lamia - Volos/NFC Volos\n",
- "found: UTA Arad/UTA Arad - Academica C./Academica Clinceni\n",
- "2020-10-06 00:00:00\n",
- "2020-10-07 00:00:00\n",
- "found: Nashville SC/Nashville SC - Minnesota Utd/Minnesota Utd\n",
- "found: HamKam/Ham-Kam - Jerv/Jerv\n",
- "found: Rentistas/CA Rentistas - Defensor/Defensor Sporting\n",
- "2020-10-08 00:00:00\n",
- "found: Botafogo/Botafogo - Palmeiras/SE Palmeiras\n",
- "found: Houston/Houston Dynamo - Dallas/FC Dallas\n",
- "found: Kansas City/Kansas City - Chicago Fire/Chicago Fire\n",
- "found: LA Galaxy/LA Galaxy - Portland/Portland Timbers\n",
- "found: O'Higgins/OHiggins - Cobresal/Cobresal\n",
- "found: Académico Viseu/Academico de Viseu - Académica/Academica\n",
- "2020-10-09 00:00:00\n",
- "found: RB Bragantino/Bragantino SP - Internacional/Internacional\n",
- "2020-10-10 00:00:00\n",
- "found: Logroñés/UD Logrones - Almería/Almeria\n",
- "found: Iquique/Deportes Iquique - Audax/Audax Italiano\n",
- "found: Dresden/Dynamo Dresden - Magdeburg/FC Magdeburg\n",
- "found: Uerdingen/Uerdingen - Bayern M. II/Bayern Munich II\n",
- "found: Verl/Verl - Hansa Rostock/Hansa Rostock\n",
- "found: Jerv/Jerv - Raufoss/Raufoss\n",
- "2020-10-11 00:00:00\n",
- "found: Leganés/Leganes - Girona/Girona\n",
- "found: Zaragoza/Zaragoza - Albacete/Albacete\n",
- "found: Ponferradina/Ponferradina - Cartagena/FC Cartagena\n",
- "found: Sogndal/Sogndal - Tromsø/Tromso\n",
- "found: Strommen/Strommen - Kongsvinger/Kongsvinger\n",
- "found: Åsane/Asane - Stjørdals/Blink/Stjordals-Blink\n",
- "found: Liverpool/Liverpool Montevideo - Boston River/Boston River\n",
- "found: Atl. Mineiro/Atletico MG - Goiás/Goias\n",
- "found: Vancouver/Vancouver Whitecaps - Salt Lake/Real Salt Lake\n",
- "found: Lugo/Lugo - Mallorca/Mallorca\n",
- "found: Fuenlabrada/Fuenlabrada - Castellón/CD Castellon\n",
- "found: Unterhaching/Unterhaching - Meppen/SV Meppen\n",
- "2020-10-12 00:00:00\n",
- "found: Santos/Santos Laguna - Tijuana/Tijuana\n",
- "2020-10-13 00:00:00\n",
- "2020-10-14 00:00:00\n",
- "found: La Serena/La Serena - Palestino/Palestino\n",
- "2020-10-15 00:00:00\n",
- "found: RoPS/RoPS - Lahti/Lahti\n",
- "found: Atl. Mineiro/Atletico MG - Fluminense/Fluminense\n",
- "found: Houston/Houston Dynamo - Nashville SC/Nashville SC\n",
- "found: Dallas/FC Dallas - Kansas City/Kansas City\n",
- "found: LA Galaxy/LA Galaxy - San Jose/San Jose Earthquakes\n",
- "found: Emelec/Emelec - Orense/Orense Sporting Club\n",
- "2020-10-16 00:00:00\n",
- "found: Dijon/Dijon - Rennes/Rennes\n",
- "found: Atlético de San Luis/San Luis - Querétaro/Queretaro\n",
- "found: Chambly/Chambly Oise - Clermont/Clermont\n",
- "found: Hebei/Hebei CFFC - Guangzhou/Guangzhou FC\n",
- "found: Tianjin/Tianjin Teda - Shenzhen/Shenzhen FC\n",
- "found: Slaven/Slaven Belupo - Istra/NK Istra\n",
- "found: Haka/Haka - TPS/TPS\n",
- "found: GAIS/GAIS - Norrby/Norrby IF\n",
- "2020-10-17 00:00:00\n",
- "found: Man City/Man City - Arsenal/Arsenal\n",
- "found: Real Madrid/Real Madrid - Cádiz/Cadiz\n",
- "found: Mainz 05/Mainz - Leverkusen/Leverkusen\n",
- "found: Freiburg/Freiburg - Bremen/Werder Bremen\n",
- "found: Bielefeld/Arminia Bielefeld - Bayern M./Bayern Munich\n",
- "found: Napoli/Napoli - Atalanta/Atalanta\n",
- "found: Inter/Inter - Milan/AC Milan\n",
- "found: Heracles/Heracles - Waalwijk/RKC Waalwijk\n",
- "found: Gil Vicente/Gil Vicente - Tondela/Tondela\n",
- "found: Marítimo/Maritimo - Portimonense/Portimonense\n",
- "found: Braga/Braga - Nacional/CD Nacional Funchal\n",
- "found: Krasnodar/FK Krasnodar - Rubin Kazan/Rubin Kazan\n",
- "found: Konyaspor/Konyaspor - Malatyaspor/Malatyaspor\n",
- "found: Barnsley/Barnsley - Bristol City/Bristol City\n",
- "found: Blackburn/Blackburn - Forest/Nottm Forest\n",
- "found: Brentford/Brentford - Coventry/Coventry\n",
- "found: Luton/Luton - Stoke/Stoke\n",
- "found: Middlesbrough/Middlesbrough - Reading/Reading\n",
- "found: Rotherham/Rotherham - Norwich/Norwich\n",
- "found: Swansea/Swansea - Huddersfield/Huddersfield\n",
- "found: Wycombe/Wycombe - Millwall/Millwall\n",
- "found: Cosenza/Cosenza - Cittadella/Cittadella\n",
- "found: Cremonese/US Cremonese - Venezia/Venezia\n",
- "found: Pordenone/Pordenone - SPAL/Spal\n",
- "found: Reggiana/Reggiana - Chievo/Chievo\n",
- "found: Salernitana/Salernitana - Pisa/Pisa\n",
- "found: Frosinone/Frosinone - Ascoli/Ascoli\n",
- "found: Valenciennes/Valenciennes - Sochaux/Sochaux\n",
- "found: AC Ajaccio/AC Ajaccio - Toulouse/Toulouse\n",
- "found: Paris FC/Paris FC - Pau/Pau\n",
- "found: Rodez/Rodez - Troyes/ESTAC Troyes\n",
- "found: Midtjylland/Midtjylland - OB/OB\n",
- "found: Mjøndalen/Mjondalen - Brann/Brann\n",
- "found: Falkenberg/Falkenbergs - Örebro/Orebro\n",
- "found: Vaduz/FC Vaduz - Lugano/Lugano\n",
- "found: Servette/Servette - Young Boys/Young Boys\n",
- "found: Adanaspor/Adanaspor - Altınordu/Altinordu\n",
- "found: Altay/Altay - Adana DS/Adana Demirspor\n",
- "found: Cercle Brugge/Cercle Brugge - Gent/Gent\n",
- "found: KV Mechelen/Yellow-Red Mechelen - Kortrijk/Kortrijk\n",
- "found: Ludogorets/Ludogorets - Tsarsko Selo/Tsarsko Selo\n",
- "found: Rijeka/Rijeka - Varaždin/Varazdin\n",
- "found: Cracovia/Cracovia Krakow - Piast/Piast Gliwice\n",
- "found: Górnik/Gornik Zabrze - Raków/Rakow Czestochowa\n",
- "found: CD Mafra/Mafra - Vizela/Vizela\n",
- "found: Zlaté Moravce/Zlate Moravce - Nitra/FC Nitra\n",
- "found: Öster/Osters - Degerfors/Degerfors\n",
- "found: Ljungskile/Ljungskile - Västerås/Vasteras SK\n",
- "found: Örgryte/Orgryte - Dalkurd/Dalkurd FF\n",
- "found: Zorya Luhansk/Zorya - Kolos Kovalivka/Kolos Kovalyovka\n",
- "found: Necaxa/Necaxa - Tijuana/Tijuana\n",
- "found: Mazatlan /Mazatlan FC - Juárez/FC Juarez\n",
- "found: Karlsruhe/Karlsruhe - Sandhausen/SV Sandhausen\n",
- "found: Shandong/Shandong Luneng - Beijing/Beijing Guoan\n",
- "found: Dalian/Dalian Yifang - Shijiazhuang/Shijiazhuang Yongchang FC\n",
- "2020-10-18 00:00:00\n",
- "found: Crystal Palace/Crystal Palace - Brighton/Brighton\n",
- "found: Athletic/Athletic Bilbao - Levante/Levante\n",
- "found: Villarreal/Villarreal - Valencia/Valencia\n",
- "found: Alavés/Alaves - Elche/Elche\n",
- "found: Schalke 04/Schalke 04 - Union Berlin/Union Berlin\n",
- "found: Spezia/Spezia - Fiorentina/Fiorentina\n",
- "found: Torino/Torino - Cagliari/Cagliari\n",
- "found: Udinese/Udinese - Parma/Parma\n",
- "found: AS Monaco/Monaco - Montpellier/Montpellier\n",
- "found: Angers/Angers - Metz/Metz\n",
- "found: ADO/ADO Den Haag - Vitesse/Vitesse Arnhem\n",
- "found: Ajax/Ajax - Heerenveen/Heerenveen\n",
- "found: Groningen/FC Groningen - Utrecht/FC Utrecht\n",
- "found: Zwolle/PEC Zwolle - PSV/PSV\n",
- "found: Paços de Ferreira/Pacos Ferreira - Santa Clara/Santa Clara\n",
- "found: Rostov/Rostov - Akhmat/Akhmat Grozny\n",
- "found: Antalyaspor/Antalyaspor - Gaziantep/Gaziantep FK\n",
- "found: Göztepe/Goztepe - Fenerbahçe/Fenerbahce\n",
- "found: Pumas UNAM/Pumas UNAM - Toluca/Toluca\n",
- "found: Rayo Vallecano/Rayo Vallecano - Espanyol/Espanyol\n",
- "found: Mirandés/Mirandes - Mallorca/Mallorca\n",
- "found: Zaragoza/Zaragoza - Málaga/Malaga\n",
- "found: AGF/AGF - Horsens/AC Horsens\n",
- "found: Haugesund/Haugesund - Sarpsborg 08/Sarpsborg\n",
- "found: Strømsgodset/Stromsgodset - Start/Start\n",
- "found: Elfsborg/Elfsborg - Kalmar/Kalmar FF\n",
- "found: Sirius/Sirius - Östersund/Ostersunds FK\n",
- "found: AIK/AIK - Göteborg/IFK Goteborg\n",
- "found: Bursaspor/Bursaspor - Balıkesirspor/Balikesirspor\n",
- "found: Genk/Genk - Charleroi/Charleroi\n",
- "found: HJK/HJK Helsinki - HIFK/HIFK\n",
- "found: Mariehamn/IFK Mariehamn - SJK/SJK\n",
- "found: KuPS/KuPS - Honka/Honka\n",
- "found: Stal Mielec/Stal Mielec - Wisła K./Wisla Krakow\n",
- "found: Legia/Legia Warsaw - Zagłębie L./Zaglebie Lubin\n",
- "found: Oleksandria/Oleksandria - Inhulets/Inhulets Petrove\n",
- "found: Rotor/Rotor Volgograd - Tambov/FK Tambov\n",
- "found: Guadalajara/Guadalajara - Atlas/Atlas\n",
- "found: Preston/Preston - Cardiff/Cardiff\n",
- "found: Fortuna/Fortuna Dusseldorf - Regensburg/Jahn Regensburg\n",
- "found: Paderborn/Paderborn - Hannover/Hannover\n",
- "found: Ankaraspor/Ankaraspor - Giresunspor/Giresunspor\n",
- "found: Henan/Henan - Wuhan/Wuhan Zall\n",
- "found: Barcelona SC/Barcelona (Ecu) - Delfín/Delfin\n",
- "2020-10-19 00:00:00\n",
- "found: Galatasaray/Galatasaray - Alanyaspor/Alanyaspor\n",
- "found: Gençlerbirliği/Genclerbirligi - Denizlispor/Denizlispor\n",
- "found: Santos/Santos Laguna - Pachuca/Pachuca\n",
- "found: Minnesota Utd/Minnesota Utd - Houston/Houston Dynamo\n",
- "found: Portland/Portland Timbers - Los Angeles/Los Angeles FC\n",
- "found: LA Galaxy/LA Galaxy - Vancouver/Vancouver Whitecaps\n",
- "found: Djurgården/Djurgardens - Malmö/Malmo FF\n",
- "found: Chongqing/Chongqing Lifan - Jiangsu/Jiangsu Suning\n",
- "found: Huanghai/Qingdao Huanghai FC - R&F/Guangzhou R&F\n",
- "found: Lahti/Lahti - Haka/Haka\n",
- "2020-10-20 00:00:00\n",
- "found: León/Leon - América/CF America\n",
- "found: Keçiörengücü/Keciorengucu - Bandırmaspor/Bandirmaspor\n",
- "found: U. La Calera/Union La Calera - Huachipato/Huachipato\n",
- "found: Chaves/Chaves - Oliveirense/Oliveirense\n",
- "2020-10-21 00:00:00\n",
- "found: Adana DS/Adana Demirspor - Ümraniyespor/Umraniyespor\n",
- "found: Guangzhou/Guangzhou FC - Hebei/Hebei CFFC\n",
- "found: Shenzhen/Shenzhen FC - Tianjin/Tianjin Teda\n",
- "found: Waalwijk/RKC Waalwijk - Zwolle/PEC Zwolle\n",
- "found: Ponferradina/Ponferradina - Tenerife/Tenerife\n",
- "found: Lecce/Lecce - Cremonese/US Cremonese\n",
- "found: Coquimbo Unido/Coquimbo Unido - U. de Concepción/Univ de Concepcion\n",
- "found: Balıkesirspor/Balikesirspor - Boluspor/Boluspor\n",
- "2020-10-22 00:00:00\n",
- "found: Honka/Honka - Lahti/Lahti\n",
- "found: KuPS/KuPS - Haka/Haka\n",
- "found: SJK/SJK - Ilves/Ilves\n",
- "found: Tuzlaspor/Tuzlaspor - Ankaraspor/Ankaraspor\n",
- "found: Beijing/Beijing Guoan - Shandong/Shandong Luneng\n",
- "found: Shijiazhuang/Shijiazhuang Yongchang FC - Dalian/Dalian Yifang\n",
- "found: Tijuana/Tijuana - Monterrey/Monterrey\n",
- "2020-10-23 00:00:00\n",
- "found: Regensburg/Jahn Regensburg - Braunschweig/Braunschweig\n",
- "found: Lyngby BK/Lyngby - OB/OB\n",
- "found: Wuhan/Wuhan Zall - Henan/Henan\n",
- "found: Varaždin/Varazdin - Slaven/Slaven Belupo\n",
- "found: Penafiel/Penafiel - Arouca/Arouca\n",
- "found: Gaz Metan/Gaz Metan Medias - Botoșani/Botosani\n",
- "2020-10-24 00:00:00\n",
- "found: Fulham/Fulham - Crystal Palace/Crystal Palace\n",
- "found: Man Utd/Man Utd - Chelsea/Chelsea\n",
- "found: Barcelona/Barcelona - Real Madrid/Real Madrid\n",
- "found: Osasuna/Osasuna - Athletic/Athletic Bilbao\n",
- "found: Union Berlin/Union Berlin - Freiburg/Freiburg\n",
- "found: Mainz 05/Mainz - M'gladbach/Mgladbach\n",
- "found: Bayern M./Bayern Munich - E. Frankfurt/Eintracht Frankfurt\n",
- "found: RB Leipzig/RB Leipzig - Hertha/Hertha Berlin\n",
- "found: Dortmund/Dortmund - Schalke 04/Schalke 04\n",
- "found: Atalanta/Atalanta - Sampdoria/Sampdoria\n",
- "found: Lorient/Lorient - Marseille/Marseille\n",
- "found: Lokomotiv/Lokomotiv - Rotor/Rotor Volgograd\n",
- "found: Gaziantep/Gaziantep FK - Konyaspor/Konyaspor\n",
- "found: Coventry/Coventry - Blackburn/Blackburn\n",
- "found: Huddersfield/Huddersfield - Preston/Preston\n",
- "found: Reading/Reading - Rotherham/Rotherham\n",
- "found: Stoke/Stoke - Brentford/Brentford\n",
- "found: Cartagena/FC Cartagena - Las Palmas/Las Palmas\n",
- "found: Logroñés/UD Logrones - Lugo/Lugo\n",
- "found: Monza/AC Monza - Chievo/Chievo\n",
- "found: Pordenone/Pordenone - Reggina /Reggina\n",
- "found: Salernitana/Salernitana - Ascoli/Ascoli\n",
- "found: Pescara/Pescara - Frosinone/Frosinone\n",
- "found: Auxerre/Auxerre - Chambly/Chambly Oise\n",
- "found: Châteauroux/Chateauroux - Paris FC/Paris FC\n",
- "found: Troyes/ESTAC Troyes - Valenciennes/Valenciennes\n",
- "found: Austria Wien/Austria Vienna - RB Salzburg/Red Bull Salzburg\n",
- "found: Ried/SV Ried - Sturm/Sturm Graz\n",
- "found: Brøndby IF/Brondby - Midtjylland/Midtjylland\n",
- "found: Häcken/Hacken - Mjällby/Mjallby\n",
- "found: Lugano/Lugano - St. Gallen/St Gallen\n",
- "found: Vaduz/FC Vaduz - Zürich/FC Zurich\n",
- "found: Levski/PFC Levski Sofia - Cherno More/Cherno More\n",
- "found: Hansa Rostock/Hansa Rostock - Viktoria Köln/Viktoria Koln\n",
- "found: Wisła K./Wisla Krakow - TS Podbeskidzie/Podbeskidzie B-B\n",
- "found: Śląsk/Slask Wroclaw - Jagiellonia/Jagiellonia Bialystock\n",
- "found: Porto B/Porto B - Chaves/Chaves\n",
- "found: Benfica B/Benfica B - Académico Viseu/Academico de Viseu\n",
- "found: Pohronie/Pohronie - Senica/FK Senica\n",
- "found: Sereď/SK Sered - Zlaté Moravce/Zlate Moravce\n",
- "found: West Ham/Cheltenham - Man City/Mansfield\n",
- "found: Nashville SC/Nashville SC - New England/New England\n",
- "found: Darmstadt/SV Darmstadt - St. Pauli/St Pauli\n",
- "found: Hannover/Hannover - Fortuna/Fortuna Dusseldorf\n",
- "found: Jiangsu/Jiangsu Suning - Chongqing/Chongqing Lifan\n",
- "found: R&F/Guangzhou R&F - Huanghai/Qingdao Huanghai FC\n",
- "2020-10-25 00:00:00\n",
- "found: Southampton/Southampton - Everton/Everton\n",
- "found: Valladolid/Valladolid - Alavés/Alaves\n",
- "found: Getafe/Getafe - Granada/Granada\n",
- "found: Bremen/Werder Bremen - Hoffenheim/Hoffenheim\n",
- "found: Benevento/Benevento - Napoli/Napoli\n",
- "found: Parma/Parma - Spezia/Spezia\n",
- "found: Montpellier/Montpellier - Reims/Reims\n",
- "found: Brest/Brest - Strasbourg/Strasbourg\n",
- "found: Vitesse/Vitesse Arnhem - PSV/PSV\n",
- "found: Moreirense/Moreirense - Marítimo/Maritimo\n",
- "found: Rostov/Rostov - Khimki/FC Khimki\n",
- "found: Akhmat/Akhmat Grozny - Ufa/FC Ufa\n",
- "found: Alanyaspor/Alanyaspor - Karagümrük/Fatih Karagumruk Istanbul\n",
- "found: Fenerbahçe/Fenerbahce - Trabzonspor/Trabzonspor\n",
- "found: Sporting/Sporting Gijon - Ponferradina/Ponferradina\n",
- "found: Tenerife/Tenerife - Espanyol/Espanyol\n",
- "found: Sandhausen/SV Sandhausen - Paderborn/Paderborn\n",
- "found: Cosenza/Cosenza - Lecce/Lecce\n",
- "found: Randers/Randers - SønderjyskE/SonderjyskE\n",
- "found: Aalesund/Aalesunds - Odd/Odds BK\n",
- "found: Sarpsborg 08/Sarpsborg - Rosenborg/Rosenborg\n",
- "found: Kalmar/Kalmar FF - Falkenberg/Falkenbergs\n",
- "found: Östersund/Ostersunds FK - Hammarby/Hammarby\n",
- "found: Malmö/Malmo FF - Göteborg/IFK Goteborg\n",
- "found: Örebro/Orebro - Helsingborg/Helsingborgs\n",
- "found: Young Boys/Young Boys - Luzern/Luzern\n",
- "found: Saarbrücken/Saarbrucken - Verl/Verl\n",
- "found: Lech/Lech Poznan - Cracovia/Cracovia Krakow\n",
- "found: Oliveirense/Oliveirense - Covilhã/Covilha\n",
- "found: Inhulets/Inhulets Petrove - Mariupol/FK Mariupol\n",
- "found: Kolos Kovalivka/Kolos Kovalyovka - FC Minaj/FC Minaj\n",
- "found: Atl. Mineiro/Atletico MG - Sport Recife/Sport Recife\n",
- "found: Tigres/Tigres - Juárez/FC Juarez\n",
- "found: Mazatlan /Mazatlan FC - Monterrey/Monterrey\n",
- "found: Houston/Houston Dynamo - Columbus/Columbus\n",
- "found: Ankaraspor/Ankaraspor - Balıkesirspor/Balikesirspor\n",
- "found: Varzim/Varzim - Estoril/Estoril Praia\n",
- "2020-10-26 00:00:00\n",
- "found: Brighton/Brighton - West Brom/West Brom\n",
- "found: CSKA/CSKA Moscow - Arsenal/Arsenal Tula\n",
- "found: Alcorcón/Alcorcon - Mallorca/Mallorca\n",
- "found: Boluspor/Boluspor - Menemen/Menemen Belediyespor\n",
- "found: Botev Plovdiv/Botev Plovdiv - Botev Vratsa/Botev Vratsa\n",
- "found: Etar/Etar - Beroe/Beroe Stara Za\n",
- "found: Dalian/Dalian Yifang - Tianjin/Tianjin Teda\n",
- "found: Hebei/Hebei CFFC - Shandong/Shandong Luneng\n",
- "2020-10-27 00:00:00\n",
- "found: Pachuca/Pachuca - Pumas UNAM/Pumas UNAM\n",
- "2020-10-28 00:00:00\n",
- "found: Slaven/Slaven Belupo - Lokomotiva/Lokomotiva\n",
- "found: Wisła K./Wisla Krakow - Lechia/Lechia Gdansk\n",
- "found: Vancouver/Vancouver Whitecaps - Seattle/Seattle Sounders\n",
- "2020-10-29 00:00:00\n",
- "found: Portland/Portland Timbers - LA Galaxy/LA Galaxy\n",
- "found: Los Angeles/Los Angeles FC - Houston/Houston Dynamo\n",
- "found: Alcorcón/Alcorcon - Sporting/Sporting Gijon\n",
- "2020-10-30 00:00:00\n",
- "found: Fortuna/Fortuna Dusseldorf - Heidenheim/FC Heidenheim\n",
- "found: Antofagasta/Antofagasta - U. La Calera/Union La Calera\n",
- "found: Balıkesirspor/Balikesirspor - Adanaspor/Adanaspor\n",
- "found: Cherno More/Cherno More - CSKA 1948/CSKA 1948 Sofia\n",
- "2020-10-31 00:00:00\n",
- "found: Sheffield Utd/Sheff Utd - Man City/Man City\n",
- "found: Real Madrid/Real Madrid - Huesca/Huesca\n",
- "found: Rubin Kazan/Rubin Kazan - Arsenal/Arsenal Tula\n",
- "found: Gençlerbirliği/Genclerbirligi - Gaziantep/Gaziantep FK\n",
- "found: Necaxa/Necaxa - Toluca/Toluca\n",
- "found: Tijuana/Tijuana - Pachuca/Pachuca\n",
- "found: Juárez/FC Juarez - Querétaro/Queretaro\n",
- "found: Bristol City/Bristol City - Norwich/Norwich\n",
- "found: Braunschweig/Braunschweig - Nürnberg/Nurnberg\n",
- "found: Paderborn/Paderborn - Regensburg/Jahn Regensburg\n",
- "found: Cittadella/Cittadella - Monza/AC Monza\n",
- "found: Giresunspor/Giresunspor - Ümraniyespor/Umraniyespor\n",
- "found: Shandong/Shandong Luneng - Hebei/Hebei CFFC\n",
- "found: Covilhã/Covilha - Porto B/Porto B\n",
- "found: Academica C./Academica Clinceni - Sepsi OSK/ACS Sepsi OSK\n",
- "found: Liverpool/Liverpool - West Ham/West Ham\n",
- "found: Osasuna/Osasuna - Atl. Madrid/Atletico Madrid\n",
- "found: Köln/FC Koln - Bayern M./Bayern Munich\n",
- "found: Bielefeld/Arminia Bielefeld - Dortmund/Dortmund\n",
- "found: M'gladbach/Mgladbach - RB Leipzig/RB Leipzig\n",
- "found: Inter/Inter - Parma/Parma\n",
- "found: Rennes/Rennes - Brest/Brest\n",
- "found: Sochi/Sochi - Lokomotiv/Lokomotiv\n",
- "found: Galatasaray/Galatasaray - Ankaragücü/Ankaragucu\n",
- "found: Middlesbrough/Middlesbrough - Forest/Nottm Forest\n",
- "found: Millwall/Millwall - Huddersfield/Huddersfield\n",
- "found: QPR/QPR - Cardiff/Cardiff\n",
- "found: Wycombe/Wycombe - Sheffield Wed/Sheff Wed\n",
- "found: Brescia/Brescia - Entella/Entella\n",
- "found: Chievo/Chievo - Cosenza/Cosenza\n",
- "found: Vicenza/LR Vicenza Virtus - Pisa/Pisa\n",
- "found: Paris FC/Paris FC - Caen/Caen\n",
- "found: St. Pölten/St Polten - Ried/SV Ried\n",
- "found: Huachipato/Huachipato - O'Higgins/OHiggins\n",
- "found: Nordsjælland/FC Nordsjaelland - Midtjylland/Midtjylland\n",
- "found: Beroe/Beroe Stara Za - Arda/Arda\n",
- "found: TPS/TPS - SJK/SJK\n",
- "found: Académica/Academica - Oliveirense/Oliveirense\n",
- "found: Botoșani/Botosani - UTA Arad/UTA Arad\n",
- "found: Pohronie/Pohronie - Zlaté Moravce/Zlate Moravce\n",
- "found: Oleksandria/Oleksandria - Desna/FK Desna Chernihiv\n",
- "found: SK Dnipro-1/Dnipro-1 - Dynamo Kyiv/Dynamo Kiev\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"binds=[]\n",
"d= datetime(2015, 5, 1)\n",
@@ -563,38 +476,38 @@
" for row in df_ss_day.itertuples(index=False):\n",
" home_parts=row.homeTeamShortLow.split(' ')\n",
" away_parts=row.awayTeamShortLow.split(' ')\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, row.awayTeamShortLow, type='00')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, row.awayTeamShortLow, type='00')\n",
" if len(df_bf_slice.index)!=1:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, row.awayTeamShortLow, type='10')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, row.awayTeamShortLow, type='10')\n",
" if len(df_bf_slice.index)!=1:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, row.awayTeamShortLow, type='01')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, row.awayTeamShortLow, type='01')\n",
" if len(df_bf_slice.index)!=1:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, row.awayTeamShortLow, type='11')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, row.awayTeamShortLow, type='11')\n",
" if len(df_bf_slice.index)!=1 and ' ' in row.homeTeamShortLow:\n",
" homeFirst=home_parts[0]\n",
" homeLast=home_parts[-1]\n",
" if len(homeFirst)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeFirst, row.awayTeamShortLow, type='10')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeFirst, row.awayTeamShortLow, type='10')\n",
" if len(df_bf_slice.index)!=1 and len(homeLast)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeLast, row.awayTeamShortLow, type='10')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeLast, row.awayTeamShortLow, type='10')\n",
" if len(df_bf_slice.index)!=1 and ' ' in row.awayTeamShortLow:\n",
" awayFirst=away_parts[0]\n",
" awayLast=away_parts[-1]\n",
" if len(df_bf_slice.index)!=1 and len(awayFirst)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, awayFirst, type='01')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, awayFirst, type='01')\n",
" if len(df_bf_slice.index)!=1 and len(awayLast)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, row.homeTeamShortLow, awayLast, type='01')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, row.homeTeamShortLow, awayLast, type='01')\n",
" if len(df_bf_slice.index)!=1 and ' ' in row.homeTeamShortLow:\n",
" homeFirst=home_parts[0]\n",
" homeLast=home_parts[-1]\n",
" if len(df_bf_slice.index)!=1 and len(homeFirst)>2 and len(awayFirst)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeFirst, awayFirst, type='11')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeFirst, awayFirst, type='11')\n",
" if len(df_bf_slice.index)!=1 and len(homeFirst)>2 and len(awayLast)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeFirst, awayLast, type='11')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeFirst, awayLast, type='11')\n",
" if len(df_bf_slice.index)!=1 and len(awayLast)>2 and len(awayFirst)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeLast, awayFirst, type='11')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeLast, awayFirst, type='11')\n",
" if len(df_bf_slice.index)!=1 and len(homeLast)>2 and len(awayLast)>2:\n",
- " df_bf_slice=slice_df(df_bf_day, row.countryCode, homeLast, awayLast, type='11')\n",
+ " df_bf_slice=slice_df(df_bf_day, row.country, homeLast, awayLast, type='11')\n",
" if len(df_bf_slice.index)==1:\n",
" eventId,home_id,away_id,bf_ht,bf_at=df_bf_slice.iloc[0][['eventId','home_id','away_id','home_name','away_name']]\n",
" print(f'found: {row.homeTeamShort}/{bf_ht} - {row.awayTeamShort}/{bf_at}')\n",
@@ -616,7 +529,478 @@
},
{
"source": [
- "## Teams\n",
+ "## SS - Fbref"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def bind_full(df_source,df_target, ds,de,target,cols, isWide=False):\n",
+ " binds=[]\n",
+ " d= ds\n",
+ " while d
=ds_src) & (df_source['ts']=ds_tgt) & (df_target['ts']2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeFirst, row.awayTeamShortLow, type='10')\n",
+ " if len(df_tgt_slice.index)!=1 and len(homeLast)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeLast, row.awayTeamShortLow, type='10')\n",
+ " if len(df_tgt_slice.index)!=1 and ' ' in row.awayTeamShortLow:\n",
+ " awayFirst=away_parts[0]\n",
+ " awayLast=away_parts[-1]\n",
+ " if len(df_tgt_slice.index)!=1 and len(awayFirst)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, row.homeTeamShortLow, awayFirst, type='01')\n",
+ " if len(df_tgt_slice.index)!=1 and len(awayLast)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, row.homeTeamShortLow, awayLast, type='01')\n",
+ " if len(df_tgt_slice.index)!=1 and ' ' in row.homeTeamShortLow:\n",
+ " homeFirst=home_parts[0]\n",
+ " homeLast=home_parts[-1]\n",
+ " if len(df_tgt_slice.index)!=1 and len(homeFirst)>2 and len(awayFirst)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeFirst, awayFirst, type='11')\n",
+ " if len(df_tgt_slice.index)!=1 and len(homeFirst)>2 and len(awayLast)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeFirst, awayLast, type='11')\n",
+ " if len(df_tgt_slice.index)!=1 and len(awayLast)>2 and len(awayFirst)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeLast, awayFirst, type='11')\n",
+ " if len(df_tgt_slice.index)!=1 and len(homeLast)>2 and len(awayLast)>2:\n",
+ " df_tgt_slice=slice_df(df_tgt, row.country, homeLast, awayLast, type='11')\n",
+ " if len(df_tgt_slice.index)==1:\n",
+ " if len(cols)==3:\n",
+ " mid,home_team,away_team=df_tgt_slice.iloc[0][cols]\n",
+ " else:\n",
+ " mid,home_id,away_id,home_team,away_team=df_tgt_slice.iloc[0][cols]\n",
+ "\n",
+ " #print(f'found: {row.homeTeamShort}/{home_team} - {row.awayTeamShort}/{away_team}')\n",
+ " binds.append({\n",
+ " 'target_home_name':home_team,\n",
+ " 'target_away_name':away_team,\n",
+ " 'target_home_id':home_id if len(cols)>3 else home_team,\n",
+ " 'target_away_id':away_id if len(cols)>3 else away_team,\n",
+ " 'target_mid':mid,\n",
+ " 'ss_home':row.homeTeamShort,\n",
+ " 'ss_away':row.awayTeamShort,\n",
+ " 'ss_id':row.id\n",
+ " })\n",
+ " #\n",
+ " d+=timedelta(days=1)\n",
+ " df_binds=pd.DataFrame(binds)\n",
+ " df_binds.to_csv(f'data/binds_ss_{target}.csv', index=False)\n",
+ " return df_binds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "NameError",
+ "evalue": "name 'df_fbref' is not defined",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf_binds\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbind_full\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_ss\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf_fbref\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2015\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m2020\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m12\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'fbref'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mid'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'tid1'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'tid2'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'team1'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'team2'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m: name 'df_fbref' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "df_binds=bind_full(df_ss,df_fbref, datetime(2015, 1, 1), datetime(2020, 12, 1),'fbref',['mid','tid1','tid2','team1','team2'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def full_semi_none(df_binds, df_ss, ds):\n",
+ " df_binds.loc[df_binds['target_home_id'].isna(),'target_home_id']=''\n",
+ " df_binds.loc[df_binds['target_away_id'].isna(),'target_away_id']=''\n",
+ " ds = pytz.timezone(local_tz).localize(ds)\n",
+ " ss=df_ss.loc[df_ss['ts']>=ds]\n",
+ "\n",
+ " #df_binds=pd.read_csv('data/binds.csv', index_col=None)\n",
+ "\n",
+ " df_ss_binded=ss.merge(df_binds, left_on=['id','homeTeamShort', 'awayTeamShort'] , right_on=['ss_id','ss_home','ss_away'], how='inner')\n",
+ " df_ss_binded=df_ss_binded.drop_duplicates()\n",
+ " df_semi = ss.loc[~(ss['id'].isin(df_ss_binded['id']))]\n",
+ " df_semi=df_semi.merge(df_binds[['target_home_name', 'target_home_id', 'ss_home']].drop_duplicates(), left_on=['homeTeamShort'] , right_on=['ss_home'], how='left')\n",
+ " df_semi=df_semi.merge(df_binds[['target_away_name', 'target_away_id', 'ss_away']].drop_duplicates(), left_on=['awayTeamShort'] , right_on=['ss_away'], how='left')\n",
+ " df_not=df_semi.loc[(df_semi['target_home_id'].isna()) & (df_semi['target_away_id'].isna())]\n",
+ " df_semi = df_semi.loc[~(df_semi['id'].isin(df_not['id']))]\n",
+ " df_semi=df_semi.drop_duplicates()\n",
+ " df_semi.loc[df_semi['target_home_id'].isna(),'target_home_id']=''\n",
+ " df_semi.loc[df_semi['target_away_id'].isna(),'target_away_id']=''\n",
+ " print(' full: {}, semi: {}, non: {}'.format(len(df_ss_binded.index),len(df_semi.index),len(df_not.index)) )\n",
+ " return df_ss_binded,df_semi,df_not\n",
+ "\n",
+ "def bind_semi(df_semi,df_target,cols, isWide=False):\n",
+ " binds=[]\n",
+ " binded_total=0\n",
+ " for row in df_semi.itertuples(index=False):\n",
+ " d=row.ts.replace(tzinfo=None)\n",
+ " ds_tgt=d-timedelta(days=1) if isWide else d\n",
+ " de_tgt=d+timedelta(days=2) if isWide else d+timedelta(days=2)\n",
+ " if row.target_home_id!='':\n",
+ " #print(0,row.target_home_id)\n",
+ " \n",
+ " df_target_slice=df_target[(df_target['ts']>=ds_tgt) & (df_target['ts']=ds_tgt) & (df_target['ts']\n\n\n \n \n \n awayTeam \n homeScoreHT \n homeTeam \n ts \n awayScoreFT \n status \n winnerCode \n country \n done \n awayTeamShort \n ... \n awayScoreET \n countryCode \n homeTeamShortLow \n awayTeamShortLow \n target_home_name \n target_home_id \n ss_home \n target_away_name \n target_away_id \n ss_away \n \n \n \n \n 0 \n Queens Park Rangers \n 2.0 \n Burnley \n 2015-01-10 15:00:00+00:00 \n 1.0 \n 100.0 \n 1.0 \n england \n 1 \n QPR \n ... \n 1.0 \n GB \n burnley \n qpr \n Burnley \n Burnley \n Burnley \n QPR \n QPR \n QPR \n \n \n 1 \n Queens Park Rangers \n 2.0 \n Burnley \n 2015-01-10 15:00:00+00:00 \n 1.0 \n 100.0 \n 1.0 \n england \n 1 \n QPR \n ... \n 1.0 \n GB \n burnley \n qpr \n Burnley U23 \n Burnley U23 \n Burnley \n QPR \n QPR \n QPR \n \n \n 2 \n Newcastle United \n 1.0 \n Chelsea \n 2015-01-10 15:00:00+00:00 \n 0.0 \n 100.0 \n 1.0 \n england \n 1 \n Newcastle \n ... \n 0.0 \n GB \n chelsea \n newcastle \n Chelsea \n Chelsea \n Chelsea \n Newcastle \n Newcastle \n Newcastle \n \n \n 3 \n Manchester City \n 0.0 \n Everton \n 2015-01-10 15:00:00+00:00 \n 1.0 \n 100.0 \n 3.0 \n england \n 1 \n Man City \n ... \n 1.0 \n GB \n everton \n man city \n Everton \n Everton \n Everton \n Manchester City \n Manchester City \n Man City \n \n \n 4 \n Manchester City \n 0.0 \n Everton \n 2015-01-10 15:00:00+00:00 \n 1.0 \n 100.0 \n 3.0 \n england \n 1 \n Man City \n ... \n 1.0 \n GB \n everton \n man city \n Everton \n Everton \n Everton \n Stoke City U23 \n Stoke City U23 \n Man City \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 50392 \n Trelleborgs FF \n 0.0 \n Jönköpings Södra \n 2020-11-28 14:00:00+00:00 \n 1.0 \n 100.0 \n 1.0 \n sweden \n 1 \n Trelleborg \n ... \n 1.0 \n SE \n jsodra \n trelleborg \n NaN \n \n NaN \n Trelleborgs \n Trelleborgs \n Trelleborg \n \n \n 50393 \n Västerås SK \n 0.0 \n Umeå \n 2020-11-28 14:00:00+00:00 \n 0.0 \n 100.0 \n 3.0 \n sweden \n 1 \n Västerås \n ... \n 0.0 \n SE \n umea \n vasteras \n Umea FC \n Umea FC \n Umeå \n Vasteras SK \n Vasteras SK \n Västerås \n \n \n 50394 \n Ljungskile SK \n 0.0 \n Östers IF \n 2020-11-28 14:00:00+00:00 \n 1.0 \n 100.0 \n 2.0 \n sweden \n 1 \n Ljungskile \n ... \n 1.0 \n SE \n oster \n ljungskile \n Osters \n Osters \n Öster \n Ljungskile \n Ljungskile \n Ljungskile \n \n \n 50395 \n Vorskla Poltava \n 2.0 \n Dynamo Kyiv \n 2020-11-28 15:00:00+00:00 \n 0.0 \n 100.0 \n 1.0 \n ukraine \n 1 \n Vorskla \n ... \n 0.0 \n UA \n dynamo kyiv \n vorskla \n Dyn. Kyiv \n Dyn. Kyiv \n Dynamo Kyiv \n Vorskla Poltava \n Vorskla Poltava \n Vorskla \n \n \n 50396 \n Shakhtar Donetsk \n 0.0 \n SK Dnipro-1 \n 2020-11-28 17:30:00+00:00 \n 1.0 \n 100.0 \n 2.0 \n ukraine \n 1 \n Shakhtar D. \n ... \n 1.0 \n UA \n sk dnipro1 \n shakhtar d \n Dnipro-1 \n Dnipro-1 \n SK Dnipro-1 \n Shakhtar Donetsk \n Shakhtar Donetsk \n Shakhtar D. \n \n \n
\n38317 rows × 29 columns
\n "
+ },
+ "metadata": {},
+ "execution_count": 88
+ }
+ ],
+ "source": [
+ "df_semi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Done! 5938 matches binded\n",
+ " full: 34082, semi: 51263, non: 7560\n",
+ "Done! 960 matches binded\n",
+ " full: 34738, semi: 80449, non: 6440\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_binds_from_semi=bind_semi(df_semi,df_op,['link','t1','t2','t1','t2'])\n",
+ "df_binds=pd.concat([df_binds,df_binds_from_semi], axis=0).drop_duplicates(subset=['ss_id','target_mid'])\n",
+ "df_binds.to_csv('data/binds_ss_op1.csv', index=False)\n",
+ "\n",
+ "df_ss_binded,df_semi,df_not=full_semi_none(df_binds, df_ss, datetime(2015, 1, 1))\n",
+ "df_binds_from_semi=bind_semi(df_semi,df_op,['link','t1','t2','t1','t2'],isWide=True)\n",
+ "df_binds=pd.concat([df_binds,df_binds_from_semi], axis=0).drop_duplicates(subset=['ss_id','target_mid'])\n",
+ "df_binds.to_csv('data/binds_ss_op2.csv', index=False)\n",
+ "\n",
+ "df_ss_binded,df_semi,df_not=full_semi_none(df_binds, df_ss, datetime(2015, 1, 1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ds country liga \\\n",
+ "0 19/02/16 15:00 england national-league-2018-2019 \n",
+ "1 14/03/25 19:00 germany bundesliga-2013-2014 \n",
+ "2 16/07/14 00:45 ecuador serie-a-2016 \n",
+ "3 17/04/08 14:00 england league-two-2016-2017 \n",
+ "4 16/09/20 15:00 iran persian-gulf-pro-league-2016-2017 \n",
+ "... ... ... ... \n",
+ "145372 18/12/01 14:30 germany bundesliga-2018-2019 \n",
+ "145373 19/12/08 16:30 croatia 1-hnl-2019-2020 \n",
+ "145374 16/08/14 20:30 peru primera-division-2016 \n",
+ "145375 10/11/20 15:00 england league-one-2010-2011 \n",
+ "145376 14/10/15 21:10 argentina primera-division-2014 \n",
+ "\n",
+ " season t1 t2 sc1 sc2 odds1 \\\n",
+ "0 2018/2019 Boreham Wood Hartlepool 0 4 2.29 \n",
+ "1 2013/2014 Dortmund Schalke 0 0 1.46 \n",
+ "2 2016 Barcelona SC Guayaquil City 2 0 1.31 \n",
+ "3 2016/2017 Cambridge Utd Leyton Orient 3 0 1.42 \n",
+ "4 2016/2017 Esteghlal F.C. Zob Ahan 2 1 1.9 \n",
+ "... ... ... ... ... ... ... \n",
+ "145372 2018/2019 Stuttgart Augsburg 1 0 2.66 \n",
+ "145373 2019/2020 Rijeka Istra 1961 2 0 1.22 \n",
+ "145374 2016 Alianza Lima Comerciantes Unidos 0 0 1.51 \n",
+ "145375 2010/2011 Brighton Bristol Rovers 2 2 1.45 \n",
+ "145376 2014 Godoy Cruz Tigre 4 3 2.16 \n",
+ "\n",
+ " oddsdraw odds2 bn \\\n",
+ "0 3.26 3.09 12 \n",
+ "1 4.82 6.42 7 \n",
+ "2 5.15 8.16 9 \n",
+ "3 4.53 7.6 11 \n",
+ "4 2.94 4.59 9 \n",
+ "... ... ... .. \n",
+ "145372 3.39 2.71 12 \n",
+ "145373 5.89 12.67 14 \n",
+ "145374 3.92 6.05 9 \n",
+ "145375 4.34 6.56 6 \n",
+ "145376 3.17 3.5 8 \n",
+ "\n",
+ " link done \\\n",
+ "0 /soccer/england/national-league-2018-2019/bore... 1 \n",
+ "1 /soccer/germany/bundesliga-2013-2014/dortmund-... 1 \n",
+ "2 /soccer/ecuador/serie-a-2016/barcelona-sc-guay... 1 \n",
+ "3 /soccer/england/league-two-2016-2017/cambridge... 1 \n",
+ "4 /soccer/iran/persian-gulf-pro-league-2016-2017... 1 \n",
+ "... ... ... \n",
+ "145372 /soccer/germany/bundesliga-2018-2019/vfb-stutt... 0 \n",
+ "145373 /soccer/croatia/1-hnl-2019-2020/rijeka-istra-1... 0 \n",
+ "145374 /soccer/peru/primera-division-2016/a-lima-come... 0 \n",
+ "145375 /soccer/england/league-one-2010-2011/brighton-... 0 \n",
+ "145376 /soccer/argentina/primera-division-2014/godoy-... 0 \n",
+ "\n",
+ " home_name_low away_name_low ts \n",
+ "0 boreham wood hartlepool 2019-02-16 15:00:00 \n",
+ "1 dortmund schalke 2014-03-25 19:00:00 \n",
+ "2 barcelona sc guayaquil city 2016-07-14 00:45:00 \n",
+ "3 cambridge utd leyton orient 2017-04-08 14:00:00 \n",
+ "4 esteghlal fc zob ahan 2016-09-20 15:00:00 \n",
+ "... ... ... ... \n",
+ "145372 stuttgart augsburg 2018-12-01 14:30:00 \n",
+ "145373 rijeka istra 1961 2019-12-08 16:30:00 \n",
+ "145374 alianza lima comerciantes unidos 2016-08-14 20:30:00 \n",
+ "145375 brighton bristol rovers 2010-11-20 15:00:00 \n",
+ "145376 godoy cruz tigre 2014-10-15 21:10:00 \n",
+ "\n",
+ "[145377 rows x 17 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n ds \n country \n liga \n season \n t1 \n t2 \n sc1 \n sc2 \n odds1 \n oddsdraw \n odds2 \n bn \n link \n done \n home_name_low \n away_name_low \n ts \n \n \n \n \n 0 \n 19/02/16 15:00 \n england \n national-league-2018-2019 \n 2018/2019 \n Boreham Wood \n Hartlepool \n 0 \n 4 \n 2.29 \n 3.26 \n 3.09 \n 12 \n /soccer/england/national-league-2018-2019/bore... \n 1 \n boreham wood \n hartlepool \n 2019-02-16 15:00:00 \n \n \n 1 \n 14/03/25 19:00 \n germany \n bundesliga-2013-2014 \n 2013/2014 \n Dortmund \n Schalke \n 0 \n 0 \n 1.46 \n 4.82 \n 6.42 \n 7 \n /soccer/germany/bundesliga-2013-2014/dortmund-... \n 1 \n dortmund \n schalke \n 2014-03-25 19:00:00 \n \n \n 2 \n 16/07/14 00:45 \n ecuador \n serie-a-2016 \n 2016 \n Barcelona SC \n Guayaquil City \n 2 \n 0 \n 1.31 \n 5.15 \n 8.16 \n 9 \n /soccer/ecuador/serie-a-2016/barcelona-sc-guay... \n 1 \n barcelona sc \n guayaquil city \n 2016-07-14 00:45:00 \n \n \n 3 \n 17/04/08 14:00 \n england \n league-two-2016-2017 \n 2016/2017 \n Cambridge Utd \n Leyton Orient \n 3 \n 0 \n 1.42 \n 4.53 \n 7.6 \n 11 \n /soccer/england/league-two-2016-2017/cambridge... \n 1 \n cambridge utd \n leyton orient \n 2017-04-08 14:00:00 \n \n \n 4 \n 16/09/20 15:00 \n iran \n persian-gulf-pro-league-2016-2017 \n 2016/2017 \n Esteghlal F.C. \n Zob Ahan \n 2 \n 1 \n 1.9 \n 2.94 \n 4.59 \n 9 \n /soccer/iran/persian-gulf-pro-league-2016-2017... \n 1 \n esteghlal fc \n zob ahan \n 2016-09-20 15:00:00 \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 145372 \n 18/12/01 14:30 \n germany \n bundesliga-2018-2019 \n 2018/2019 \n Stuttgart \n Augsburg \n 1 \n 0 \n 2.66 \n 3.39 \n 2.71 \n 12 \n /soccer/germany/bundesliga-2018-2019/vfb-stutt... \n 0 \n stuttgart \n augsburg \n 2018-12-01 14:30:00 \n \n \n 145373 \n 19/12/08 16:30 \n croatia \n 1-hnl-2019-2020 \n 2019/2020 \n Rijeka \n Istra 1961 \n 2 \n 0 \n 1.22 \n 5.89 \n 12.67 \n 14 \n /soccer/croatia/1-hnl-2019-2020/rijeka-istra-1... \n 0 \n rijeka \n istra 1961 \n 2019-12-08 16:30:00 \n \n \n 145374 \n 16/08/14 20:30 \n peru \n primera-division-2016 \n 2016 \n Alianza Lima \n Comerciantes Unidos \n 0 \n 0 \n 1.51 \n 3.92 \n 6.05 \n 9 \n /soccer/peru/primera-division-2016/a-lima-come... \n 0 \n alianza lima \n comerciantes unidos \n 2016-08-14 20:30:00 \n \n \n 145375 \n 10/11/20 15:00 \n england \n league-one-2010-2011 \n 2010/2011 \n Brighton \n Bristol Rovers \n 2 \n 2 \n 1.45 \n 4.34 \n 6.56 \n 6 \n /soccer/england/league-one-2010-2011/brighton-... \n 0 \n brighton \n bristol rovers \n 2010-11-20 15:00:00 \n \n \n 145376 \n 14/10/15 21:10 \n argentina \n primera-division-2014 \n 2014 \n Godoy Cruz \n Tigre \n 4 \n 3 \n 2.16 \n 3.17 \n 3.5 \n 8 \n /soccer/argentina/primera-division-2014/godoy-... \n 0 \n godoy cruz \n tigre \n 2014-10-15 21:10:00 \n \n \n
\n
145377 rows × 17 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 65
+ }
+ ],
+ "source": [
+ "df_op"
+ ]
+ },
+ {
+ "source": [
+ "# Teams\n",
"Extract teams with Unicode "
],
"cell_type": "markdown",
@@ -624,7 +1008,7 @@
},
{
"cell_type": "code",
- "execution_count": 101,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -648,47 +1032,9 @@
},
{
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- " Team TeamShort country \\\n",
- "0 Málaga Málaga spain \n",
- "1 Académica Coimbra Académica portugal \n",
- "2 Club América América mexico \n",
- "3 Almería Almería spain \n",
- "4 Nîmes Olympique Nîmes france \n",
- ".. ... ... ... \n",
- "881 Colón de Santa Fe Colón argentina \n",
- "882 Nõmme Kalju Kalju estonia \n",
- "883 Umeå Umeå sweden \n",
- "884 Lillestrøm SK Lillestrøm norway \n",
- "885 MFK Ružomberok Ružomberok slovakia \n",
- "\n",
- " tournament TeamEn TeamShortEn \n",
- "0 laliga Malaga Malaga \n",
- "1 primeira-liga Academica Coimbra Academica \n",
- "2 liga-mx-clausura Club America America \n",
- "3 laliga Almeria Almeria \n",
- "4 ligue-2 Nimes Olympique Nimes \n",
- ".. ... ... ... \n",
- "881 copa-de-la-liga-profesional-group-2 NaN NaN \n",
- "882 premium-liiga NaN NaN \n",
- "883 superettan NaN NaN \n",
- "884 1st-division NaN NaN \n",
- "885 slovensky-pohar NaN NaN \n",
- "\n",
- "[886 rows x 6 columns]"
- ],
- "text/html": "
\n\n
\n \n \n \n Team \n TeamShort \n country \n tournament \n TeamEn \n TeamShortEn \n \n \n \n \n 0 \n Málaga \n Málaga \n spain \n laliga \n Malaga \n Malaga \n \n \n 1 \n Académica Coimbra \n Académica \n portugal \n primeira-liga \n Academica Coimbra \n Academica \n \n \n 2 \n Club América \n América \n mexico \n liga-mx-clausura \n Club America \n America \n \n \n 3 \n Almería \n Almería \n spain \n laliga \n Almeria \n Almeria \n \n \n 4 \n Nîmes Olympique \n Nîmes \n france \n ligue-2 \n Nimes Olympique \n Nimes \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 881 \n Colón de Santa Fe \n Colón \n argentina \n copa-de-la-liga-profesional-group-2 \n NaN \n NaN \n \n \n 882 \n Nõmme Kalju \n Kalju \n estonia \n premium-liiga \n NaN \n NaN \n \n \n 883 \n Umeå \n Umeå \n sweden \n superettan \n NaN \n NaN \n \n \n 884 \n Lillestrøm SK \n Lillestrøm \n norway \n 1st-division \n NaN \n NaN \n \n \n 885 \n MFK Ružomberok \n Ružomberok \n slovakia \n slovensky-pohar \n NaN \n NaN \n \n \n
\n
886 rows × 6 columns
\n
"
- },
- "metadata": {},
- "execution_count": 102
- }
- ],
+ "outputs": [],
"source": [
"df_teams"
]
@@ -702,17 +1048,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- " full: 52256, semi: 46435, non: 2402\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"ds = pytz.timezone(local_tz).localize(datetime(2015, 5, 1))\n",
"ss=df_ss.loc[df_ss['ts']>=ds]\n",
@@ -734,19 +1072,11 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"tags": []
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Done! 0 matches binded\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"binds=[]\n",
"binded_total=0\n",
@@ -795,7 +1125,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -804,7 +1134,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -813,93 +1143,16 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- " awayTeam homeScoreHT homeTeam \\\n",
- "0 Derby County 0.0 Ipswich Town \n",
- "1 Stade Brestois 29 1.0 Troyes \n",
- "2 AEL Kalloni 1.0 Veria NFC \n",
- "3 Queens Park Rangers 2.0 Burnley \n",
- "4 Newcastle United 1.0 Chelsea \n",
- "... ... ... ... \n",
- "53991 Colorado Rapids 1.0 Chicago Fire \n",
- "53992 Impact Montreal 2.0 Philadelphia Union \n",
- "53993 FC Luzern 0.0 FC St. Gallen 1879 \n",
- "53994 FC Thun 0.0 Grasshopper Club Zürich \n",
- "53995 Botev Plovdiv 2.0 Ludogorets Razgrad \n",
- "\n",
- " ts awayScoreFT status winnerCode country \\\n",
- "0 2015-01-10 12:15:00+00:00 1.0 100 2 england \n",
- "1 2015-01-10 13:00:00+00:00 0.0 100 1 france \n",
- "2 2015-01-10 13:00:00+00:00 1.0 100 3 greece \n",
- "3 2015-01-10 15:00:00+00:00 1.0 100 1 england \n",
- "4 2015-01-10 15:00:00+00:00 0.0 100 1 england \n",
- "... ... ... ... ... ... \n",
- "53991 2019-04-20 17:00:00+00:00 1.0 100 1 usa \n",
- "53992 2019-04-20 17:00:00+00:00 0.0 100 1 usa \n",
- "53993 2019-04-20 17:00:00+00:00 2.0 100 2 switzerland \n",
- "53994 2019-04-20 17:00:00+00:00 1.0 100 3 switzerland \n",
- "53995 2019-04-20 17:00:00+00:00 0.0 100 1 bulgaria \n",
- "\n",
- " done awayTeamShort ... tournament round \\\n",
- "0 1 Derby ... championship 25.0 \n",
- "1 1 Brest ... ligue-2 19.0 \n",
- "2 1 Kalloni ... super-league 11.0 \n",
- "3 1 QPR ... premier-league 21.0 \n",
- "4 1 Newcastle ... premier-league 21.0 \n",
- "... ... ... ... ... ... \n",
- "53991 1 Colorado ... major-league-soccer NaN \n",
- "53992 1 Montreal ... major-league-soccer NaN \n",
- "53993 1 Luzern ... super-league 30.0 \n",
- "53994 1 Thun ... super-league 30.0 \n",
- "53995 1 Botev Plovdiv ... parva-liga-championship-round 30.0 \n",
- "\n",
- " coverage awayScoreHT homeScoreFT startTimestamp awayScoreET \\\n",
- "0 1 0.0 0.0 1420892100 1 \n",
- "1 1 0.0 1.0 1420894800 0 \n",
- "2 1 1.0 1.0 1420894800 1 \n",
- "3 1 1.0 2.0 1420902000 1 \n",
- "4 1 0.0 2.0 1420902000 0 \n",
- "... ... ... ... ... ... \n",
- "53991 1 1.0 4.0 1555779600 1 \n",
- "53992 1 0.0 3.0 1555779600 0 \n",
- "53993 1 1.0 1.0 1555779600 2 \n",
- "53994 1 0.0 1.0 1555779600 1 \n",
- "53995 1 0.0 3.0 1555779600 0 \n",
- "\n",
- " countryCode homeTeamShortLow awayTeamShortLow \n",
- "0 GB ipswich derby \n",
- "1 FR troyes brest \n",
- "2 GR veria kalloni \n",
- "3 GB burnley qpr \n",
- "4 GB chelsea newcastle \n",
- "... ... ... ... \n",
- "53991 US chicago fire colorado \n",
- "53992 US philadelphia montreal \n",
- "53993 CH st. gallen luzern \n",
- "53994 CH grasshoppers thun \n",
- "53995 BG ludogorets botev plovdiv \n",
- "\n",
- "[53996 rows x 23 columns]"
- ],
- "text/html": "
\n\n
\n \n \n \n awayTeam \n homeScoreHT \n homeTeam \n ts \n awayScoreFT \n status \n winnerCode \n country \n done \n awayTeamShort \n ... \n tournament \n round \n coverage \n awayScoreHT \n homeScoreFT \n startTimestamp \n awayScoreET \n countryCode \n homeTeamShortLow \n awayTeamShortLow \n \n \n \n \n 0 \n Derby County \n 0.0 \n Ipswich Town \n 2015-01-10 12:15:00+00:00 \n 1.0 \n 100 \n 2 \n england \n 1 \n Derby \n ... \n championship \n 25.0 \n 1 \n 0.0 \n 0.0 \n 1420892100 \n 1 \n GB \n ipswich \n derby \n \n \n 1 \n Stade Brestois 29 \n 1.0 \n Troyes \n 2015-01-10 13:00:00+00:00 \n 0.0 \n 100 \n 1 \n france \n 1 \n Brest \n ... \n ligue-2 \n 19.0 \n 1 \n 0.0 \n 1.0 \n 1420894800 \n 0 \n FR \n troyes \n brest \n \n \n 2 \n AEL Kalloni \n 1.0 \n Veria NFC \n 2015-01-10 13:00:00+00:00 \n 1.0 \n 100 \n 3 \n greece \n 1 \n Kalloni \n ... \n super-league \n 11.0 \n 1 \n 1.0 \n 1.0 \n 1420894800 \n 1 \n GR \n veria \n kalloni \n \n \n 3 \n Queens Park Rangers \n 2.0 \n Burnley \n 2015-01-10 15:00:00+00:00 \n 1.0 \n 100 \n 1 \n england \n 1 \n QPR \n ... \n premier-league \n 21.0 \n 1 \n 1.0 \n 2.0 \n 1420902000 \n 1 \n GB \n burnley \n qpr \n \n \n 4 \n Newcastle United \n 1.0 \n Chelsea \n 2015-01-10 15:00:00+00:00 \n 0.0 \n 100 \n 1 \n england \n 1 \n Newcastle \n ... \n premier-league \n 21.0 \n 1 \n 0.0 \n 2.0 \n 1420902000 \n 0 \n GB \n chelsea \n newcastle \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 53991 \n Colorado Rapids \n 1.0 \n Chicago Fire \n 2019-04-20 17:00:00+00:00 \n 1.0 \n 100 \n 1 \n usa \n 1 \n Colorado \n ... \n major-league-soccer \n NaN \n 1 \n 1.0 \n 4.0 \n 1555779600 \n 1 \n US \n chicago fire \n colorado \n \n \n 53992 \n Impact Montreal \n 2.0 \n Philadelphia Union \n 2019-04-20 17:00:00+00:00 \n 0.0 \n 100 \n 1 \n usa \n 1 \n Montreal \n ... \n major-league-soccer \n NaN \n 1 \n 0.0 \n 3.0 \n 1555779600 \n 0 \n US \n philadelphia \n montreal \n \n \n 53993 \n FC Luzern \n 0.0 \n FC St. Gallen 1879 \n 2019-04-20 17:00:00+00:00 \n 2.0 \n 100 \n 2 \n switzerland \n 1 \n Luzern \n ... \n super-league \n 30.0 \n 1 \n 1.0 \n 1.0 \n 1555779600 \n 2 \n CH \n st. gallen \n luzern \n \n \n 53994 \n FC Thun \n 0.0 \n Grasshopper Club Zürich \n 2019-04-20 17:00:00+00:00 \n 1.0 \n 100 \n 3 \n switzerland \n 1 \n Thun \n ... \n super-league \n 30.0 \n 1 \n 0.0 \n 1.0 \n 1555779600 \n 1 \n CH \n grasshoppers \n thun \n \n \n 53995 \n Botev Plovdiv \n 2.0 \n Ludogorets Razgrad \n 2019-04-20 17:00:00+00:00 \n 0.0 \n 100 \n 1 \n bulgaria \n 1 \n Botev Plovdiv \n ... \n parva-liga-championship-round \n 30.0 \n 1 \n 0.0 \n 3.0 \n 1555779600 \n 0 \n BG \n ludogorets \n botev plovdiv \n \n \n
\n
53996 rows × 23 columns
\n
"
- },
- "metadata": {},
- "execution_count": 16
- }
- ],
+ "outputs": [],
"source": [
"df_ss"
]
},
{
"cell_type": "code",
- "execution_count": 118,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -908,7 +1161,7 @@
},
{
"cell_type": "code",
- "execution_count": 56,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -917,7 +1170,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -926,645 +1179,13 @@
},
{
"cell_type": "code",
- "execution_count": 144,
+ "execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "data/bf\\2018-Oct-16_matches.csv\n",
- "data/bf\\2018-Oct-17_matches.csv\n",
- "data/bf\\2018-Oct-18_matches.csv\n",
- "data/bf\\2018-Oct-19_matches.csv\n",
- "data/bf\\2018-Oct-20_matches.csv\n",
- "data/bf\\2018-Oct-21_matches.csv\n",
- "data/bf\\2018-Oct-22_matches.csv\n",
- "data/bf\\2018-Oct-23_matches.csv\n",
- "data/bf\\2018-Oct-24_matches.csv\n",
- "data/bf\\2018-Oct-25_matches.csv\n",
- "data/bf\\2018-Oct-26_matches.csv\n",
- "data/bf\\2018-Oct-27_matches.csv\n",
- "data/bf\\2018-Oct-28_matches.csv\n",
- "data/bf\\2018-Oct-29_matches.csv\n",
- "data/bf\\2018-Oct-30_matches.csv\n",
- "data/bf\\2018-Oct-31_matches.csv\n",
- "data/bf\\2018-Sep-01_matches.csv\n",
- "data/bf\\2018-Sep-02_matches.csv\n",
- "data/bf\\2018-Sep-03_matches.csv\n",
- "data/bf\\2018-Sep-04_matches.csv\n",
- "data/bf\\2018-Sep-05_matches.csv\n",
- "data/bf\\2018-Sep-06_matches.csv\n",
- "data/bf\\2018-Sep-07_matches.csv\n",
- "data/bf\\2018-Sep-08_matches.csv\n",
- "data/bf\\2018-Sep-09_matches.csv\n",
- "data/bf\\2018-Sep-10_matches.csv\n",
- "data/bf\\2018-Sep-11_matches.csv\n",
- "data/bf\\2018-Sep-12_matches.csv\n",
- "data/bf\\2018-Sep-13_matches.csv\n",
- "data/bf\\2018-Sep-14_matches.csv\n",
- "data/bf\\2018-Sep-15_matches.csv\n",
- "data/bf\\2018-Sep-17_matches.csv\n",
- "data/bf\\2018-Sep-18_matches.csv\n",
- "data/bf\\2018-Sep-19_matches.csv\n",
- "data/bf\\2018-Sep-20_matches.csv\n",
- "data/bf\\2018-Sep-21_matches.csv\n",
- "data/bf\\2018-Sep-22_matches.csv\n",
- "data/bf\\2018-Sep-23_matches.csv\n",
- "data/bf\\2018-Sep-24_matches.csv\n",
- "data/bf\\2018-Sep-25_matches.csv\n",
- "data/bf\\2018-Sep-26_matches.csv\n",
- "data/bf\\2018-Sep-27_matches.csv\n",
- "data/bf\\2018-Sep-28_matches.csv\n",
- "data/bf\\2018-Sep-29_matches.csv\n",
- "data/bf\\2018-Sep-30_matches.csv\n",
- "data/bf\\2019-Apr-01_matches.csv\n",
- "data/bf\\2019-Apr-02_matches.csv\n",
- "data/bf\\2019-Apr-03_matches.csv\n",
- "data/bf\\2019-Apr-04_matches.csv\n",
- "data/bf\\2019-Apr-05_matches.csv\n",
- "data/bf\\2019-Apr-06_matches.csv\n",
- "data/bf\\2019-Apr-07_matches.csv\n",
- "data/bf\\2019-Apr-08_matches.csv\n",
- "data/bf\\2019-Apr-09_matches.csv\n",
- "data/bf\\2019-Apr-10_matches.csv\n",
- "data/bf\\2019-Apr-11_matches.csv\n",
- "data/bf\\2019-Apr-12_matches.csv\n",
- "data/bf\\2019-Apr-13_matches.csv\n",
- "data/bf\\2019-Apr-14_matches.csv\n",
- "data/bf\\2019-Apr-15_matches.csv\n",
- "data/bf\\2019-Apr-16_matches.csv\n",
- "data/bf\\2019-Apr-17_matches.csv\n",
- "data/bf\\2019-Apr-18_matches.csv\n",
- "data/bf\\2019-Apr-19_matches.csv\n",
- "data/bf\\2019-Apr-20_matches.csv\n",
- "data/bf\\2019-Apr-21_matches.csv\n",
- "data/bf\\2019-Apr-22_matches.csv\n",
- "data/bf\\2019-Apr-23_matches.csv\n",
- "data/bf\\2019-Apr-24_matches.csv\n",
- "data/bf\\2019-Apr-25_matches.csv\n",
- "data/bf\\2019-Apr-26_matches.csv\n",
- "data/bf\\2019-Apr-27_matches.csv\n",
- "data/bf\\2019-Apr-28_matches.csv\n",
- "data/bf\\2019-Apr-29_matches.csv\n",
- "data/bf\\2019-Apr-30_matches.csv\n",
- "data/bf\\2019-Aug-01_matches.csv\n",
- "data/bf\\2019-Aug-02_matches.csv\n",
- "data/bf\\2019-Aug-03_matches.csv\n",
- "data/bf\\2019-Aug-04_matches.csv\n",
- "data/bf\\2019-Aug-05_matches.csv\n",
- "data/bf\\2019-Aug-06_matches.csv\n",
- "data/bf\\2019-Aug-07_matches.csv\n",
- "data/bf\\2019-Aug-08_matches.csv\n",
- "data/bf\\2019-Aug-09_matches.csv\n",
- "data/bf\\2019-Aug-10_matches.csv\n",
- "data/bf\\2019-Aug-11_matches.csv\n",
- "data/bf\\2019-Aug-12_matches.csv\n",
- "data/bf\\2019-Aug-13_matches.csv\n",
- "data/bf\\2019-Aug-14_matches.csv\n",
- "data/bf\\2019-Aug-15_matches.csv\n",
- "data/bf\\2019-Aug-16_matches.csv\n",
- "data/bf\\2019-Aug-17_matches.csv\n",
- "data/bf\\2019-Aug-18_matches.csv\n",
- "data/bf\\2019-Aug-19_matches.csv\n",
- "data/bf\\2019-Aug-20_matches.csv\n",
- "data/bf\\2019-Aug-21_matches.csv\n",
- "data/bf\\2019-Aug-22_matches.csv\n",
- "data/bf\\2019-Aug-23_matches.csv\n",
- "data/bf\\2019-Aug-24_matches.csv\n",
- "data/bf\\2019-Aug-25_matches.csv\n",
- "data/bf\\2019-Aug-26_matches.csv\n",
- "data/bf\\2019-Aug-27_matches.csv\n",
- "data/bf\\2019-Aug-28_matches.csv\n",
- "data/bf\\2019-Aug-29_matches.csv\n",
- "data/bf\\2019-Aug-30_matches.csv\n",
- "data/bf\\2019-Aug-31_matches.csv\n",
- "data/bf\\2019-Dec-01_matches.csv\n",
- "data/bf\\2019-Dec-02_matches.csv\n",
- "data/bf\\2019-Dec-03_matches.csv\n",
- "data/bf\\2019-Dec-04_matches.csv\n",
- "data/bf\\2019-Dec-05_matches.csv\n",
- "data/bf\\2019-Dec-06_matches.csv\n",
- "data/bf\\2019-Dec-07_matches.csv\n",
- "data/bf\\2019-Dec-08_matches.csv\n",
- "data/bf\\2019-Dec-09_matches.csv\n",
- "data/bf\\2019-Dec-10_matches.csv\n",
- "data/bf\\2019-Dec-11_matches.csv\n",
- "data/bf\\2019-Dec-12_matches.csv\n",
- "data/bf\\2019-Dec-13_matches.csv\n",
- "data/bf\\2019-Dec-14_matches.csv\n",
- "data/bf\\2019-Dec-15_matches.csv\n",
- "data/bf\\2019-Dec-16_matches.csv\n",
- "data/bf\\2019-Dec-17_matches.csv\n",
- "data/bf\\2019-Dec-18_matches.csv\n",
- "data/bf\\2019-Dec-19_matches.csv\n",
- "data/bf\\2019-Dec-20_matches.csv\n",
- "data/bf\\2019-Dec-21_matches.csv\n",
- "data/bf\\2019-Dec-22_matches.csv\n",
- "data/bf\\2019-Dec-23_matches.csv\n",
- "data/bf\\2019-Dec-24_matches.csv\n",
- "data/bf\\2019-Dec-25_matches.csv\n",
- "data/bf\\2019-Dec-26_matches.csv\n",
- "data/bf\\2019-Dec-27_matches.csv\n",
- "data/bf\\2019-Dec-28_matches.csv\n",
- "data/bf\\2019-Dec-29_matches.csv\n",
- "data/bf\\2019-Dec-30_matches.csv\n",
- "data/bf\\2019-Dec-31_matches.csv\n",
- "data/bf\\2019-Feb-01_matches.csv\n",
- "data/bf\\2019-Feb-02_matches.csv\n",
- "data/bf\\2019-Feb-03_matches.csv\n",
- "data/bf\\2019-Feb-04_matches.csv\n",
- "data/bf\\2019-Feb-05_matches.csv\n",
- "data/bf\\2019-Feb-06_matches.csv\n",
- "data/bf\\2019-Feb-07_matches.csv\n",
- "data/bf\\2019-Feb-08_matches.csv\n",
- "data/bf\\2019-Feb-09_matches.csv\n",
- "data/bf\\2019-Feb-10_matches.csv\n",
- "data/bf\\2019-Feb-11_matches.csv\n",
- "data/bf\\2019-Feb-12_matches.csv\n",
- "data/bf\\2019-Feb-13_matches.csv\n",
- "data/bf\\2019-Feb-14_matches.csv\n",
- "data/bf\\2019-Feb-15_matches.csv\n",
- "data/bf\\2019-Feb-16_matches.csv\n",
- "data/bf\\2019-Feb-17_matches.csv\n",
- "data/bf\\2019-Feb-18_matches.csv\n",
- "data/bf\\2019-Feb-19_matches.csv\n",
- "data/bf\\2019-Feb-20_matches.csv\n",
- "data/bf\\2019-Feb-21_matches.csv\n",
- "data/bf\\2019-Feb-22_matches.csv\n",
- "data/bf\\2019-Feb-23_matches.csv\n",
- "data/bf\\2019-Feb-24_matches.csv\n",
- "data/bf\\2019-Feb-25_matches.csv\n",
- "data/bf\\2019-Feb-26_matches.csv\n",
- "data/bf\\2019-Feb-27_matches.csv\n",
- "data/bf\\2019-Feb-28_matches.csv\n",
- "data/bf\\2019-Jan-01_matches.csv\n",
- "data/bf\\2019-Jan-02_matches.csv\n",
- "data/bf\\2019-Jan-03_matches.csv\n",
- "data/bf\\2019-Jan-04_matches.csv\n",
- "data/bf\\2019-Jan-05_matches.csv\n",
- "data/bf\\2019-Jan-06_matches.csv\n",
- "data/bf\\2019-Jan-07_matches.csv\n",
- "data/bf\\2019-Jan-08_matches.csv\n",
- "data/bf\\2019-Jan-09_matches.csv\n",
- "data/bf\\2019-Jan-10_matches.csv\n",
- "data/bf\\2019-Jan-11_matches.csv\n",
- "data/bf\\2019-Jan-12_matches.csv\n",
- "data/bf\\2019-Jan-13_matches.csv\n",
- "data/bf\\2019-Jan-14_matches.csv\n",
- "data/bf\\2019-Jan-15_matches.csv\n",
- "data/bf\\2019-Jan-16_matches.csv\n",
- "data/bf\\2019-Jan-17_matches.csv\n",
- "data/bf\\2019-Jan-18_matches.csv\n",
- "data/bf\\2019-Jan-19_matches.csv\n",
- "data/bf\\2019-Jan-20_matches.csv\n",
- "data/bf\\2019-Jan-21_matches.csv\n",
- "data/bf\\2019-Jan-22_matches.csv\n",
- "data/bf\\2019-Jan-23_matches.csv\n",
- "data/bf\\2019-Jan-24_matches.csv\n",
- "data/bf\\2019-Jan-25_matches.csv\n",
- "data/bf\\2019-Jan-26_matches.csv\n",
- "data/bf\\2019-Jan-27_matches.csv\n",
- "data/bf\\2019-Jan-28_matches.csv\n",
- "data/bf\\2019-Jan-29_matches.csv\n",
- "data/bf\\2019-Jan-30_matches.csv\n",
- "data/bf\\2019-Jan-31_matches.csv\n",
- "data/bf\\2019-Jul-01_matches.csv\n",
- "data/bf\\2019-Jul-02_matches.csv\n",
- "data/bf\\2019-Jul-03_matches.csv\n",
- "data/bf\\2019-Jul-04_matches.csv\n",
- "data/bf\\2019-Jul-05_matches.csv\n",
- "data/bf\\2019-Jul-06_matches.csv\n",
- "data/bf\\2019-Jul-07_matches.csv\n",
- "data/bf\\2019-Jul-08_matches.csv\n",
- "data/bf\\2019-Jul-09_matches.csv\n",
- "data/bf\\2019-Jul-10_matches.csv\n",
- "data/bf\\2019-Jul-11_matches.csv\n",
- "data/bf\\2019-Jul-12_matches.csv\n",
- "data/bf\\2019-Jul-13_matches.csv\n",
- "data/bf\\2019-Jul-14_matches.csv\n",
- "data/bf\\2019-Jul-15_matches.csv\n",
- "data/bf\\2019-Jul-16_matches.csv\n",
- "data/bf\\2019-Jul-17_matches.csv\n",
- "data/bf\\2019-Jul-18_matches.csv\n",
- "data/bf\\2019-Jul-19_matches.csv\n",
- "data/bf\\2019-Jul-20_matches.csv\n",
- "data/bf\\2019-Jul-21_matches.csv\n",
- "data/bf\\2019-Jul-22_matches.csv\n",
- "data/bf\\2019-Jul-23_matches.csv\n",
- "data/bf\\2019-Jul-24_matches.csv\n",
- "data/bf\\2019-Jul-25_matches.csv\n",
- "data/bf\\2019-Jul-26_matches.csv\n",
- "data/bf\\2019-Jul-27_matches.csv\n",
- "data/bf\\2019-Jul-28_matches.csv\n",
- "data/bf\\2019-Jul-29_matches.csv\n",
- "data/bf\\2019-Jul-30_matches.csv\n",
- "data/bf\\2019-Jul-31_matches.csv\n",
- "data/bf\\2019-Jun-01_matches.csv\n",
- "data/bf\\2019-Jun-02_matches.csv\n",
- "data/bf\\2019-Jun-03_matches.csv\n",
- "data/bf\\2019-Jun-04_matches.csv\n",
- "data/bf\\2019-Jun-05_matches.csv\n",
- "data/bf\\2019-Jun-06_matches.csv\n",
- "data/bf\\2019-Jun-07_matches.csv\n",
- "data/bf\\2019-Jun-08_matches.csv\n",
- "data/bf\\2019-Jun-09_matches.csv\n",
- "data/bf\\2019-Jun-10_matches.csv\n",
- "data/bf\\2019-Jun-11_matches.csv\n",
- "data/bf\\2019-Jun-12_matches.csv\n",
- "data/bf\\2019-Jun-13_matches.csv\n",
- "data/bf\\2019-Jun-14_matches.csv\n",
- "data/bf\\2019-Jun-15_matches.csv\n",
- "data/bf\\2019-Jun-16_matches.csv\n",
- "data/bf\\2019-Jun-17_matches.csv\n",
- "data/bf\\2019-Jun-18_matches.csv\n",
- "data/bf\\2019-Jun-19_matches.csv\n",
- "data/bf\\2019-Jun-20_matches.csv\n",
- "data/bf\\2019-Jun-21_matches.csv\n",
- "data/bf\\2019-Jun-22_matches.csv\n",
- "data/bf\\2019-Jun-23_matches.csv\n",
- "data/bf\\2019-Jun-24_matches.csv\n",
- "data/bf\\2019-Jun-25_matches.csv\n",
- "data/bf\\2019-Jun-26_matches.csv\n",
- "data/bf\\2019-Jun-27_matches.csv\n",
- "data/bf\\2019-Jun-28_matches.csv\n",
- "data/bf\\2019-Jun-29_matches.csv\n",
- "data/bf\\2019-Jun-30_matches.csv\n",
- "data/bf\\2019-Mar-01_matches.csv\n",
- "data/bf\\2019-Mar-02_matches.csv\n",
- "data/bf\\2019-Mar-03_matches.csv\n",
- "data/bf\\2019-Mar-04_matches.csv\n",
- "data/bf\\2019-Mar-05_matches.csv\n",
- "data/bf\\2019-Mar-06_matches.csv\n",
- "data/bf\\2019-Mar-07_matches.csv\n",
- "data/bf\\2019-Mar-08_matches.csv\n",
- "data/bf\\2019-Mar-09_matches.csv\n",
- "data/bf\\2019-Mar-10_matches.csv\n",
- "data/bf\\2019-Mar-11_matches.csv\n",
- "data/bf\\2019-Mar-12_matches.csv\n",
- "data/bf\\2019-Mar-13_matches.csv\n",
- "data/bf\\2019-Mar-14_matches.csv\n",
- "data/bf\\2019-Mar-15_matches.csv\n",
- "data/bf\\2019-Mar-16_matches.csv\n",
- "data/bf\\2019-Mar-17_matches.csv\n",
- "data/bf\\2019-Mar-18_matches.csv\n",
- "data/bf\\2019-Mar-19_matches.csv\n",
- "data/bf\\2019-Mar-20_matches.csv\n",
- "data/bf\\2019-Mar-21_matches.csv\n",
- "data/bf\\2019-Mar-22_matches.csv\n",
- "data/bf\\2019-Mar-23_matches.csv\n",
- "data/bf\\2019-Mar-24_matches.csv\n",
- "data/bf\\2019-Mar-25_matches.csv\n",
- "data/bf\\2019-Mar-26_matches.csv\n",
- "data/bf\\2019-Mar-27_matches.csv\n",
- "data/bf\\2019-Mar-28_matches.csv\n",
- "data/bf\\2019-Mar-29_matches.csv\n",
- "data/bf\\2019-Mar-30_matches.csv\n",
- "data/bf\\2019-Mar-31_matches.csv\n",
- "data/bf\\2019-May-01_matches.csv\n",
- "data/bf\\2019-May-02_matches.csv\n",
- "data/bf\\2019-May-03_matches.csv\n",
- "data/bf\\2019-May-04_matches.csv\n",
- "data/bf\\2019-May-05_matches.csv\n",
- "data/bf\\2019-May-06_matches.csv\n",
- "data/bf\\2019-May-07_matches.csv\n",
- "data/bf\\2019-May-08_matches.csv\n",
- "data/bf\\2019-May-09_matches.csv\n",
- "data/bf\\2019-May-10_matches.csv\n",
- "data/bf\\2019-May-11_matches.csv\n",
- "data/bf\\2019-May-12_matches.csv\n",
- "data/bf\\2019-May-13_matches.csv\n",
- "data/bf\\2019-May-14_matches.csv\n",
- "data/bf\\2019-May-15_matches.csv\n",
- "data/bf\\2019-May-16_matches.csv\n",
- "data/bf\\2019-May-17_matches.csv\n",
- "data/bf\\2019-May-18_matches.csv\n",
- "data/bf\\2019-May-19_matches.csv\n",
- "data/bf\\2019-May-20_matches.csv\n",
- "data/bf\\2019-May-21_matches.csv\n",
- "data/bf\\2019-May-22_matches.csv\n",
- "data/bf\\2019-May-23_matches.csv\n",
- "data/bf\\2019-May-24_matches.csv\n",
- "data/bf\\2019-May-25_matches.csv\n",
- "data/bf\\2019-May-26_matches.csv\n",
- "data/bf\\2019-May-27_matches.csv\n",
- "data/bf\\2019-May-28_matches.csv\n",
- "data/bf\\2019-May-29_matches.csv\n",
- "data/bf\\2019-May-30_matches.csv\n",
- "data/bf\\2019-May-31_matches.csv\n",
- "data/bf\\2019-Nov-01_matches.csv\n",
- "data/bf\\2019-Nov-02_matches.csv\n",
- "data/bf\\2019-Nov-03_matches.csv\n",
- "data/bf\\2019-Nov-04_matches.csv\n",
- "data/bf\\2019-Nov-05_matches.csv\n",
- "data/bf\\2019-Nov-06_matches.csv\n",
- "data/bf\\2019-Nov-07_matches.csv\n",
- "data/bf\\2019-Nov-08_matches.csv\n",
- "data/bf\\2019-Nov-09_matches.csv\n",
- "data/bf\\2019-Nov-10_matches.csv\n",
- "data/bf\\2019-Nov-11_matches.csv\n",
- "data/bf\\2019-Nov-12_matches.csv\n",
- "data/bf\\2019-Nov-13_matches.csv\n",
- "data/bf\\2019-Nov-14_matches.csv\n",
- "data/bf\\2019-Nov-15_matches.csv\n",
- "data/bf\\2019-Nov-16_matches.csv\n",
- "data/bf\\2019-Nov-17_matches.csv\n",
- "data/bf\\2019-Nov-18_matches.csv\n",
- "data/bf\\2019-Nov-19_matches.csv\n",
- "data/bf\\2019-Nov-20_matches.csv\n",
- "data/bf\\2019-Nov-21_matches.csv\n",
- "data/bf\\2019-Nov-22_matches.csv\n",
- "data/bf\\2019-Nov-23_matches.csv\n",
- "data/bf\\2019-Nov-24_matches.csv\n",
- "data/bf\\2019-Nov-25_matches.csv\n",
- "data/bf\\2019-Nov-26_matches.csv\n",
- "data/bf\\2019-Nov-27_matches.csv\n",
- "data/bf\\2019-Nov-28_matches.csv\n",
- "data/bf\\2019-Nov-29_matches.csv\n",
- "data/bf\\2019-Nov-30_matches.csv\n",
- "data/bf\\2019-Oct-01_matches.csv\n",
- "data/bf\\2019-Oct-02_matches.csv\n",
- "data/bf\\2019-Oct-03_matches.csv\n",
- "data/bf\\2019-Oct-04_matches.csv\n",
- "data/bf\\2019-Oct-05_matches.csv\n",
- "data/bf\\2019-Oct-06_matches.csv\n",
- "data/bf\\2019-Oct-07_matches.csv\n",
- "data/bf\\2019-Oct-08_matches.csv\n",
- "data/bf\\2019-Oct-09_matches.csv\n",
- "data/bf\\2019-Oct-10_matches.csv\n",
- "data/bf\\2019-Oct-11_matches.csv\n",
- "data/bf\\2019-Oct-12_matches.csv\n",
- "data/bf\\2019-Oct-13_matches.csv\n",
- "data/bf\\2019-Oct-14_matches.csv\n",
- "data/bf\\2019-Oct-15_matches.csv\n",
- "data/bf\\2019-Oct-16_matches.csv\n",
- "data/bf\\2019-Oct-17_matches.csv\n",
- "data/bf\\2019-Oct-18_matches.csv\n",
- "data/bf\\2019-Oct-19_matches.csv\n",
- "data/bf\\2019-Oct-20_matches.csv\n",
- "data/bf\\2019-Oct-21_matches.csv\n",
- "data/bf\\2019-Oct-22_matches.csv\n",
- "data/bf\\2019-Oct-23_matches.csv\n",
- "data/bf\\2019-Oct-24_matches.csv\n",
- "data/bf\\2019-Oct-25_matches.csv\n",
- "data/bf\\2019-Oct-26_matches.csv\n",
- "data/bf\\2019-Oct-27_matches.csv\n",
- "data/bf\\2019-Oct-28_matches.csv\n",
- "data/bf\\2019-Oct-29_matches.csv\n",
- "data/bf\\2019-Oct-30_matches.csv\n",
- "data/bf\\2019-Oct-31_matches.csv\n",
- "data/bf\\2019-Sep-01_matches.csv\n",
- "data/bf\\2019-Sep-02_matches.csv\n",
- "data/bf\\2019-Sep-03_matches.csv\n",
- "data/bf\\2019-Sep-04_matches.csv\n",
- "data/bf\\2019-Sep-05_matches.csv\n",
- "data/bf\\2019-Sep-06_matches.csv\n",
- "data/bf\\2019-Sep-07_matches.csv\n",
- "data/bf\\2019-Sep-08_matches.csv\n",
- "data/bf\\2019-Sep-09_matches.csv\n",
- "data/bf\\2019-Sep-10_matches.csv\n",
- "data/bf\\2019-Sep-11_matches.csv\n",
- "data/bf\\2019-Sep-12_matches.csv\n",
- "data/bf\\2019-Sep-13_matches.csv\n",
- "data/bf\\2019-Sep-14_matches.csv\n",
- "data/bf\\2019-Sep-15_matches.csv\n",
- "data/bf\\2019-Sep-16_matches.csv\n",
- "data/bf\\2019-Sep-17_matches.csv\n",
- "data/bf\\2019-Sep-18_matches.csv\n",
- "data/bf\\2019-Sep-19_matches.csv\n",
- "data/bf\\2019-Sep-20_matches.csv\n",
- "data/bf\\2019-Sep-21_matches.csv\n",
- "data/bf\\2019-Sep-22_matches.csv\n",
- "data/bf\\2019-Sep-23_matches.csv\n",
- "data/bf\\2019-Sep-24_matches.csv\n",
- "data/bf\\2019-Sep-25_matches.csv\n",
- "data/bf\\2019-Sep-26_matches.csv\n",
- "data/bf\\2019-Sep-27_matches.csv\n",
- "data/bf\\2019-Sep-28_matches.csv\n",
- "data/bf\\2019-Sep-29_matches.csv\n",
- "data/bf\\2019-Sep-30_matches.csv\n",
- "data/bf\\2020-Aug-01_matches.csv\n",
- "data/bf\\2020-Aug-02_matches.csv\n",
- "data/bf\\2020-Aug-03_matches.csv\n",
- "data/bf\\2020-Aug-04_matches.csv\n",
- "data/bf\\2020-Aug-05_matches.csv\n",
- "data/bf\\2020-Aug-06_matches.csv\n",
- "data/bf\\2020-Aug-07_matches.csv\n",
- "data/bf\\2020-Aug-08_matches.csv\n",
- "data/bf\\2020-Aug-09_matches.csv\n",
- "data/bf\\2020-Aug-10_matches.csv\n",
- "data/bf\\2020-Aug-11_matches.csv\n",
- "data/bf\\2020-Aug-12_matches.csv\n",
- "data/bf\\2020-Aug-13_matches.csv\n",
- "data/bf\\2020-Aug-14_matches.csv\n",
- "data/bf\\2020-Aug-15_matches.csv\n",
- "data/bf\\2020-Aug-16_matches.csv\n",
- "data/bf\\2020-Aug-17_matches.csv\n",
- "data/bf\\2020-Aug-18_matches.csv\n",
- "data/bf\\2020-Aug-19_matches.csv\n",
- "data/bf\\2020-Aug-20_matches.csv\n",
- "data/bf\\2020-Aug-21_matches.csv\n",
- "data/bf\\2020-Aug-22_matches.csv\n",
- "data/bf\\2020-Aug-23_matches.csv\n",
- "data/bf\\2020-Aug-24_matches.csv\n",
- "data/bf\\2020-Aug-25_matches.csv\n",
- "data/bf\\2020-Aug-26_matches.csv\n",
- "data/bf\\2020-Aug-27_matches.csv\n",
- "data/bf\\2020-Aug-28_matches.csv\n",
- "data/bf\\2020-Aug-29_matches.csv\n",
- "data/bf\\2020-Aug-30_matches.csv\n",
- "data/bf\\2020-Aug-31_matches.csv\n",
- "data/bf\\2020-Feb-01_matches.csv\n",
- "data/bf\\2020-Feb-02_matches.csv\n",
- "data/bf\\2020-Feb-03_matches.csv\n",
- "data/bf\\2020-Feb-04_matches.csv\n",
- "data/bf\\2020-Feb-05_matches.csv\n",
- "data/bf\\2020-Feb-06_matches.csv\n",
- "data/bf\\2020-Feb-07_matches.csv\n",
- "data/bf\\2020-Feb-08_matches.csv\n",
- "data/bf\\2020-Feb-09_matches.csv\n",
- "data/bf\\2020-Feb-10_matches.csv\n",
- "data/bf\\2020-Feb-11_matches.csv\n",
- "data/bf\\2020-Feb-12_matches.csv\n",
- "data/bf\\2020-Feb-13_matches.csv\n",
- "data/bf\\2020-Feb-14_matches.csv\n",
- "data/bf\\2020-Feb-15_matches.csv\n",
- "data/bf\\2020-Feb-16_matches.csv\n",
- "data/bf\\2020-Feb-17_matches.csv\n",
- "data/bf\\2020-Feb-18_matches.csv\n",
- "data/bf\\2020-Feb-19_matches.csv\n",
- "data/bf\\2020-Feb-20_matches.csv\n",
- "data/bf\\2020-Feb-21_matches.csv\n",
- "data/bf\\2020-Feb-22_matches.csv\n",
- "data/bf\\2020-Feb-23_matches.csv\n",
- "data/bf\\2020-Feb-24_matches.csv\n",
- "data/bf\\2020-Feb-25_matches.csv\n",
- "data/bf\\2020-Feb-26_matches.csv\n",
- "data/bf\\2020-Feb-27_matches.csv\n",
- "data/bf\\2020-Feb-28_matches.csv\n",
- "data/bf\\2020-Feb-29_matches.csv\n",
- "data/bf\\2020-Jan-01_matches.csv\n",
- "data/bf\\2020-Jan-02_matches.csv\n",
- "data/bf\\2020-Jan-03_matches.csv\n",
- "data/bf\\2020-Jan-04_matches.csv\n",
- "data/bf\\2020-Jan-05_matches.csv\n",
- "data/bf\\2020-Jan-06_matches.csv\n",
- "data/bf\\2020-Jan-07_matches.csv\n",
- "data/bf\\2020-Jan-08_matches.csv\n",
- "data/bf\\2020-Jan-09_matches.csv\n",
- "data/bf\\2020-Jan-10_matches.csv\n",
- "data/bf\\2020-Jan-11_matches.csv\n",
- "data/bf\\2020-Jan-12_matches.csv\n",
- "data/bf\\2020-Jan-13_matches.csv\n",
- "data/bf\\2020-Jan-14_matches.csv\n",
- "data/bf\\2020-Jan-15_matches.csv\n",
- "data/bf\\2020-Jan-16_matches.csv\n",
- "data/bf\\2020-Jan-17_matches.csv\n",
- "data/bf\\2020-Jan-18_matches.csv\n",
- "data/bf\\2020-Jan-19_matches.csv\n",
- "data/bf\\2020-Jan-20_matches.csv\n",
- "data/bf\\2020-Jan-21_matches.csv\n",
- "data/bf\\2020-Jan-22_matches.csv\n",
- "data/bf\\2020-Jan-23_matches.csv\n",
- "data/bf\\2020-Jan-24_matches.csv\n",
- "data/bf\\2020-Jan-25_matches.csv\n",
- "data/bf\\2020-Jan-26_matches.csv\n",
- "data/bf\\2020-Jan-27_matches.csv\n",
- "data/bf\\2020-Jan-28_matches.csv\n",
- "data/bf\\2020-Jan-29_matches.csv\n",
- "data/bf\\2020-Jan-30_matches.csv\n",
- "data/bf\\2020-Jan-31_matches.csv\n",
- "data/bf\\2020-Jul-01_matches.csv\n",
- "data/bf\\2020-Jul-02_matches.csv\n",
- "data/bf\\2020-Jul-03_matches.csv\n",
- "data/bf\\2020-Jul-04_matches.csv\n",
- "data/bf\\2020-Jul-05_matches.csv\n",
- "data/bf\\2020-Jul-06_matches.csv\n",
- "data/bf\\2020-Jul-07_matches.csv\n",
- "data/bf\\2020-Jul-08_matches.csv\n",
- "data/bf\\2020-Jul-09_matches.csv\n",
- "data/bf\\2020-Jul-10_matches.csv\n",
- "data/bf\\2020-Jul-11_matches.csv\n",
- "data/bf\\2020-Jul-12_matches.csv\n",
- "data/bf\\2020-Jul-13_matches.csv\n",
- "data/bf\\2020-Jul-14_matches.csv\n",
- "data/bf\\2020-Jul-15_matches.csv\n",
- "data/bf\\2020-Jul-16_matches.csv\n",
- "data/bf\\2020-Jul-17_matches.csv\n",
- "data/bf\\2020-Jul-18_matches.csv\n",
- "data/bf\\2020-Jul-19_matches.csv\n",
- "data/bf\\2020-Jul-20_matches.csv\n",
- "data/bf\\2020-Jul-21_matches.csv\n",
- "data/bf\\2020-Jul-22_matches.csv\n",
- "data/bf\\2020-Jul-23_matches.csv\n",
- "data/bf\\2020-Jul-24_matches.csv\n",
- "data/bf\\2020-Jul-25_matches.csv\n",
- "data/bf\\2020-Jul-26_matches.csv\n",
- "data/bf\\2020-Jul-27_matches.csv\n",
- "data/bf\\2020-Jul-28_matches.csv\n",
- "data/bf\\2020-Jul-29_matches.csv\n",
- "data/bf\\2020-Jul-30_matches.csv\n",
- "data/bf\\2020-Jul-31_matches.csv\n",
- "data/bf\\2020-Jun-21_matches.csv\n",
- "data/bf\\2020-Mar-01_matches.csv\n",
- "data/bf\\2020-Mar-02_matches.csv\n",
- "data/bf\\2020-Mar-03_matches.csv\n",
- "data/bf\\2020-Mar-04_matches.csv\n",
- "data/bf\\2020-Mar-05_matches.csv\n",
- "data/bf\\2020-Mar-06_matches.csv\n",
- "data/bf\\2020-Mar-07_matches.csv\n",
- "data/bf\\2020-Mar-08_matches.csv\n",
- "data/bf\\2020-Mar-09_matches.csv\n",
- "data/bf\\2020-Mar-10_matches.csv\n",
- "data/bf\\2020-Mar-11_matches.csv\n",
- "data/bf\\2020-Mar-12_matches.csv\n",
- "data/bf\\2020-Mar-13_matches.csv\n",
- "data/bf\\2020-Mar-14_matches.csv\n",
- "data/bf\\2020-Mar-15_matches.csv\n",
- "data/bf\\2020-Mar-16_matches.csv\n",
- "data/bf\\2020-Mar-17_matches.csv\n",
- "data/bf\\2020-Mar-18_matches.csv\n",
- "data/bf\\2020-Mar-19_matches.csv\n",
- "data/bf\\2020-Mar-20_matches.csv\n",
- "data/bf\\2020-Mar-21_matches.csv\n",
- "data/bf\\2020-Mar-22_matches.csv\n",
- "data/bf\\2020-Mar-23_matches.csv\n",
- "data/bf\\2020-Mar-24_matches.csv\n",
- "data/bf\\2020-Mar-25_matches.csv\n",
- "data/bf\\2020-Mar-26_matches.csv\n",
- "data/bf\\2020-Mar-27_matches.csv\n",
- "data/bf\\2020-Mar-28_matches.csv\n",
- "data/bf\\2020-Mar-29_matches.csv\n",
- "data/bf\\2020-Mar-30_matches.csv\n",
- "data/bf\\2020-Mar-31_matches.csv\n",
- "data/bf\\2020-Oct-01_matches.csv\n",
- "data/bf\\2020-Oct-02_matches.csv\n",
- "data/bf\\2020-Oct-03_matches.csv\n",
- "data/bf\\2020-Oct-04_matches.csv\n",
- "data/bf\\2020-Oct-05_matches.csv\n",
- "data/bf\\2020-Oct-06_matches.csv\n",
- "data/bf\\2020-Oct-07_matches.csv\n",
- "data/bf\\2020-Oct-08_matches.csv\n",
- "data/bf\\2020-Oct-09_matches.csv\n",
- "data/bf\\2020-Oct-10_matches.csv\n",
- "data/bf\\2020-Oct-11_matches.csv\n",
- "data/bf\\2020-Oct-12_matches.csv\n",
- "data/bf\\2020-Oct-13_matches.csv\n",
- "data/bf\\2020-Oct-14_matches.csv\n",
- "data/bf\\2020-Oct-15_matches.csv\n",
- "data/bf\\2020-Oct-16_matches.csv\n",
- "data/bf\\2020-Oct-17_matches.csv\n",
- "data/bf\\2020-Oct-18_matches.csv\n",
- "data/bf\\2020-Oct-19_matches.csv\n",
- "data/bf\\2020-Oct-20_matches.csv\n",
- "data/bf\\2020-Oct-21_matches.csv\n",
- "data/bf\\2020-Oct-22_matches.csv\n",
- "data/bf\\2020-Oct-23_matches.csv\n",
- "data/bf\\2020-Oct-24_matches.csv\n",
- "data/bf\\2020-Oct-25_matches.csv\n",
- "data/bf\\2020-Oct-26_matches.csv\n",
- "data/bf\\2020-Oct-27_matches.csv\n",
- "data/bf\\2020-Oct-28_matches.csv\n",
- "data/bf\\2020-Oct-29_matches.csv\n",
- "data/bf\\2020-Oct-30_matches.csv\n",
- "data/bf\\2020-Oct-31_matches.csv\n",
- "data/bf\\2020-Sep-01_matches.csv\n",
- "data/bf\\2020-Sep-02_matches.csv\n",
- "data/bf\\2020-Sep-03_matches.csv\n",
- "data/bf\\2020-Sep-04_matches.csv\n",
- "data/bf\\2020-Sep-05_matches.csv\n",
- "data/bf\\2020-Sep-06_matches.csv\n",
- "data/bf\\2020-Sep-07_matches.csv\n",
- "data/bf\\2020-Sep-08_matches.csv\n",
- "data/bf\\2020-Sep-09_matches.csv\n",
- "data/bf\\2020-Sep-10_matches.csv\n",
- "data/bf\\2020-Sep-11_matches.csv\n",
- "data/bf\\2020-Sep-12_matches.csv\n",
- "data/bf\\2020-Sep-13_matches.csv\n",
- "data/bf\\2020-Sep-14_matches.csv\n",
- "data/bf\\2020-Sep-15_matches.csv\n",
- "data/bf\\2020-Sep-16_matches.csv\n",
- "data/bf\\2020-Sep-17_matches.csv\n",
- "data/bf\\2020-Sep-18_matches.csv\n",
- "data/bf\\2020-Sep-19_matches.csv\n",
- "data/bf\\2020-Sep-20_matches.csv\n",
- "data/bf\\2020-Sep-21_matches.csv\n",
- "data/bf\\2020-Sep-22_matches.csv\n",
- "data/bf\\2020-Sep-23_matches.csv\n",
- "data/bf\\2020-Sep-24_matches.csv\n",
- "data/bf\\2020-Sep-25_matches.csv\n",
- "data/bf\\2020-Sep-26_matches.csv\n",
- "data/bf\\2020-Sep-27_matches.csv\n",
- "data/bf\\2020-Sep-28_matches.csv\n",
- "data/bf\\2020-Sep-29_matches.csv\n",
- "data/bf\\2020-Sep-30_matches.csv\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"dfs=[]\n",
"for y in range(2015,2021):\n",
@@ -1601,7 +1222,7 @@
},
{
"cell_type": "code",
- "execution_count": 177,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -1610,609 +1231,13 @@
},
{
"cell_type": "code",
- "execution_count": 204,
+ "execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "dds.csv\n",
- "data/bf/days\\2018-Sep-23_odds.csv\n",
- "data/bf/days\\2018-Sep-24_odds.csv\n",
- "data/bf/days\\2018-Sep-25_odds.csv\n",
- "data/bf/days\\2018-Sep-26_odds.csv\n",
- "data/bf/days\\2018-Sep-27_odds.csv\n",
- "data/bf/days\\2018-Sep-28_odds.csv\n",
- "data/bf/days\\2018-Sep-29_odds.csv\n",
- "data/bf/days\\2018-Sep-30_odds.csv\n",
- "data/bf/days\\2019-Apr-01_odds.csv\n",
- "data/bf/days\\2019-Apr-02_odds.csv\n",
- "data/bf/days\\2019-Apr-03_odds.csv\n",
- "data/bf/days\\2019-Apr-04_odds.csv\n",
- "data/bf/days\\2019-Apr-05_odds.csv\n",
- "data/bf/days\\2019-Apr-06_odds.csv\n",
- "data/bf/days\\2019-Apr-07_odds.csv\n",
- "data/bf/days\\2019-Apr-08_odds.csv\n",
- "data/bf/days\\2019-Apr-09_odds.csv\n",
- "data/bf/days\\2019-Apr-10_odds.csv\n",
- "data/bf/days\\2019-Apr-11_odds.csv\n",
- "data/bf/days\\2019-Apr-12_odds.csv\n",
- "data/bf/days\\2019-Apr-13_odds.csv\n",
- "data/bf/days\\2019-Apr-14_odds.csv\n",
- "data/bf/days\\2019-Apr-15_odds.csv\n",
- "data/bf/days\\2019-Apr-16_odds.csv\n",
- "data/bf/days\\2019-Apr-17_odds.csv\n",
- "data/bf/days\\2019-Apr-18_odds.csv\n",
- "data/bf/days\\2019-Apr-19_odds.csv\n",
- "data/bf/days\\2019-Apr-20_odds.csv\n",
- "data/bf/days\\2019-Apr-21_odds.csv\n",
- "data/bf/days\\2019-Apr-22_odds.csv\n",
- "data/bf/days\\2019-Apr-23_odds.csv\n",
- "data/bf/days\\2019-Apr-24_odds.csv\n",
- "data/bf/days\\2019-Apr-25_odds.csv\n",
- "data/bf/days\\2019-Apr-26_odds.csv\n",
- "data/bf/days\\2019-Apr-27_odds.csv\n",
- "data/bf/days\\2019-Apr-28_odds.csv\n",
- "data/bf/days\\2019-Apr-29_odds.csv\n",
- "data/bf/days\\2019-Apr-30_odds.csv\n",
- "data/bf/days\\2019-Aug-01_odds.csv\n",
- "data/bf/days\\2019-Aug-02_odds.csv\n",
- "data/bf/days\\2019-Aug-03_odds.csv\n",
- "data/bf/days\\2019-Aug-04_odds.csv\n",
- "data/bf/days\\2019-Aug-05_odds.csv\n",
- "data/bf/days\\2019-Aug-06_odds.csv\n",
- "data/bf/days\\2019-Aug-07_odds.csv\n",
- "data/bf/days\\2019-Aug-08_odds.csv\n",
- "data/bf/days\\2019-Aug-09_odds.csv\n",
- "data/bf/days\\2019-Aug-10_odds.csv\n",
- "data/bf/days\\2019-Aug-11_odds.csv\n",
- "data/bf/days\\2019-Aug-12_odds.csv\n",
- "data/bf/days\\2019-Aug-13_odds.csv\n",
- "data/bf/days\\2019-Aug-14_odds.csv\n",
- "data/bf/days\\2019-Aug-15_odds.csv\n",
- "data/bf/days\\2019-Aug-16_odds.csv\n",
- "data/bf/days\\2019-Aug-17_odds.csv\n",
- "data/bf/days\\2019-Aug-18_odds.csv\n",
- "data/bf/days\\2019-Aug-19_odds.csv\n",
- "data/bf/days\\2019-Aug-20_odds.csv\n",
- "data/bf/days\\2019-Aug-21_odds.csv\n",
- "data/bf/days\\2019-Aug-22_odds.csv\n",
- "data/bf/days\\2019-Aug-23_odds.csv\n",
- "data/bf/days\\2019-Aug-24_odds.csv\n",
- "data/bf/days\\2019-Aug-25_odds.csv\n",
- "data/bf/days\\2019-Aug-26_odds.csv\n",
- "data/bf/days\\2019-Aug-27_odds.csv\n",
- "data/bf/days\\2019-Aug-28_odds.csv\n",
- "data/bf/days\\2019-Aug-29_odds.csv\n",
- "data/bf/days\\2019-Aug-30_odds.csv\n",
- "data/bf/days\\2019-Aug-31_odds.csv\n",
- "data/bf/days\\2019-Dec-01_odds.csv\n",
- "data/bf/days\\2019-Dec-02_odds.csv\n",
- "data/bf/days\\2019-Dec-03_odds.csv\n",
- "data/bf/days\\2019-Dec-04_odds.csv\n",
- "data/bf/days\\2019-Dec-05_odds.csv\n",
- "data/bf/days\\2019-Dec-06_odds.csv\n",
- "data/bf/days\\2019-Dec-07_odds.csv\n",
- "data/bf/days\\2019-Dec-08_odds.csv\n",
- "data/bf/days\\2019-Dec-09_odds.csv\n",
- "data/bf/days\\2019-Dec-10_odds.csv\n",
- "data/bf/days\\2019-Dec-11_odds.csv\n",
- "data/bf/days\\2019-Dec-12_odds.csv\n",
- "data/bf/days\\2019-Dec-13_odds.csv\n",
- "data/bf/days\\2019-Dec-14_odds.csv\n",
- "data/bf/days\\2019-Dec-15_odds.csv\n",
- "data/bf/days\\2019-Dec-16_odds.csv\n",
- "data/bf/days\\2019-Dec-17_odds.csv\n",
- "data/bf/days\\2019-Dec-18_odds.csv\n",
- "data/bf/days\\2019-Dec-19_odds.csv\n",
- "data/bf/days\\2019-Dec-20_odds.csv\n",
- "data/bf/days\\2019-Dec-21_odds.csv\n",
- "data/bf/days\\2019-Dec-22_odds.csv\n",
- "data/bf/days\\2019-Dec-23_odds.csv\n",
- "data/bf/days\\2019-Dec-24_odds.csv\n",
- "data/bf/days\\2019-Dec-25_odds.csv\n",
- "data/bf/days\\2019-Dec-26_odds.csv\n",
- "data/bf/days\\2019-Dec-27_odds.csv\n",
- "data/bf/days\\2019-Dec-28_odds.csv\n",
- "data/bf/days\\2019-Dec-29_odds.csv\n",
- "data/bf/days\\2019-Dec-30_odds.csv\n",
- "data/bf/days\\2019-Dec-31_odds.csv\n",
- "data/bf/days\\2019-Feb-01_odds.csv\n",
- "data/bf/days\\2019-Feb-02_odds.csv\n",
- "data/bf/days\\2019-Feb-03_odds.csv\n",
- "data/bf/days\\2019-Feb-04_odds.csv\n",
- "data/bf/days\\2019-Feb-05_odds.csv\n",
- "data/bf/days\\2019-Feb-06_odds.csv\n",
- "data/bf/days\\2019-Feb-07_odds.csv\n",
- "data/bf/days\\2019-Feb-08_odds.csv\n",
- "data/bf/days\\2019-Feb-09_odds.csv\n",
- "data/bf/days\\2019-Feb-10_odds.csv\n",
- "data/bf/days\\2019-Feb-11_odds.csv\n",
- "data/bf/days\\2019-Feb-12_odds.csv\n",
- "data/bf/days\\2019-Feb-13_odds.csv\n",
- "data/bf/days\\2019-Feb-14_odds.csv\n",
- "data/bf/days\\2019-Feb-15_odds.csv\n",
- "data/bf/days\\2019-Feb-16_odds.csv\n",
- "data/bf/days\\2019-Feb-17_odds.csv\n",
- "data/bf/days\\2019-Feb-18_odds.csv\n",
- "data/bf/days\\2019-Feb-19_odds.csv\n",
- "data/bf/days\\2019-Feb-20_odds.csv\n",
- "data/bf/days\\2019-Feb-21_odds.csv\n",
- "data/bf/days\\2019-Feb-22_odds.csv\n",
- "data/bf/days\\2019-Feb-23_odds.csv\n",
- "data/bf/days\\2019-Feb-24_odds.csv\n",
- "data/bf/days\\2019-Feb-25_odds.csv\n",
- "data/bf/days\\2019-Feb-26_odds.csv\n",
- "data/bf/days\\2019-Feb-27_odds.csv\n",
- "data/bf/days\\2019-Feb-28_odds.csv\n",
- "data/bf/days\\2019-Jan-01_odds.csv\n",
- "data/bf/days\\2019-Jan-02_odds.csv\n",
- "data/bf/days\\2019-Jan-03_odds.csv\n",
- "data/bf/days\\2019-Jan-04_odds.csv\n",
- "data/bf/days\\2019-Jan-05_odds.csv\n",
- "data/bf/days\\2019-Jan-06_odds.csv\n",
- "data/bf/days\\2019-Jan-07_odds.csv\n",
- "data/bf/days\\2019-Jan-08_odds.csv\n",
- "data/bf/days\\2019-Jan-09_odds.csv\n",
- "data/bf/days\\2019-Jan-10_odds.csv\n",
- "data/bf/days\\2019-Jan-11_odds.csv\n",
- "data/bf/days\\2019-Jan-12_odds.csv\n",
- "data/bf/days\\2019-Jan-13_odds.csv\n",
- "data/bf/days\\2019-Jan-14_odds.csv\n",
- "data/bf/days\\2019-Jan-15_odds.csv\n",
- "data/bf/days\\2019-Jan-16_odds.csv\n",
- "data/bf/days\\2019-Jan-17_odds.csv\n",
- "data/bf/days\\2019-Jan-18_odds.csv\n",
- "data/bf/days\\2019-Jan-19_odds.csv\n",
- "data/bf/days\\2019-Jan-20_odds.csv\n",
- "data/bf/days\\2019-Jan-21_odds.csv\n",
- "data/bf/days\\2019-Jan-22_odds.csv\n",
- "data/bf/days\\2019-Jan-23_odds.csv\n",
- "data/bf/days\\2019-Jan-24_odds.csv\n",
- "data/bf/days\\2019-Jan-25_odds.csv\n",
- "data/bf/days\\2019-Jan-26_odds.csv\n",
- "data/bf/days\\2019-Jan-27_odds.csv\n",
- "data/bf/days\\2019-Jan-28_odds.csv\n",
- "data/bf/days\\2019-Jan-29_odds.csv\n",
- "data/bf/days\\2019-Jan-30_odds.csv\n",
- "data/bf/days\\2019-Jan-31_odds.csv\n",
- "data/bf/days\\2019-Jul-01_odds.csv\n",
- "data/bf/days\\2019-Jul-02_odds.csv\n",
- "data/bf/days\\2019-Jul-03_odds.csv\n",
- "data/bf/days\\2019-Jul-04_odds.csv\n",
- "data/bf/days\\2019-Jul-05_odds.csv\n",
- "data/bf/days\\2019-Jul-06_odds.csv\n",
- "data/bf/days\\2019-Jul-07_odds.csv\n",
- "data/bf/days\\2019-Jul-08_odds.csv\n",
- "data/bf/days\\2019-Jul-09_odds.csv\n",
- "data/bf/days\\2019-Jul-10_odds.csv\n",
- "data/bf/days\\2019-Jul-11_odds.csv\n",
- "data/bf/days\\2019-Jul-12_odds.csv\n",
- "data/bf/days\\2019-Jul-13_odds.csv\n",
- "data/bf/days\\2019-Jul-14_odds.csv\n",
- "data/bf/days\\2019-Jul-15_odds.csv\n",
- "data/bf/days\\2019-Jul-16_odds.csv\n",
- "data/bf/days\\2019-Jul-17_odds.csv\n",
- "data/bf/days\\2019-Jul-18_odds.csv\n",
- "data/bf/days\\2019-Jul-19_odds.csv\n",
- "data/bf/days\\2019-Jul-20_odds.csv\n",
- "data/bf/days\\2019-Jul-21_odds.csv\n",
- "data/bf/days\\2019-Jul-22_odds.csv\n",
- "data/bf/days\\2019-Jul-23_odds.csv\n",
- "data/bf/days\\2019-Jul-24_odds.csv\n",
- "data/bf/days\\2019-Jul-25_odds.csv\n",
- "data/bf/days\\2019-Jul-26_odds.csv\n",
- "data/bf/days\\2019-Jul-27_odds.csv\n",
- "data/bf/days\\2019-Jul-28_odds.csv\n",
- "data/bf/days\\2019-Jul-29_odds.csv\n",
- "data/bf/days\\2019-Jul-30_odds.csv\n",
- "data/bf/days\\2019-Jul-31_odds.csv\n",
- "data/bf/days\\2019-Jun-01_odds.csv\n",
- "data/bf/days\\2019-Jun-02_odds.csv\n",
- "data/bf/days\\2019-Jun-03_odds.csv\n",
- "data/bf/days\\2019-Jun-04_odds.csv\n",
- "data/bf/days\\2019-Jun-05_odds.csv\n",
- "data/bf/days\\2019-Jun-06_odds.csv\n",
- "data/bf/days\\2019-Jun-07_odds.csv\n",
- "data/bf/days\\2019-Jun-08_odds.csv\n",
- "data/bf/days\\2019-Jun-09_odds.csv\n",
- "data/bf/days\\2019-Jun-10_odds.csv\n",
- "data/bf/days\\2019-Jun-11_odds.csv\n",
- "data/bf/days\\2019-Jun-12_odds.csv\n",
- "data/bf/days\\2019-Jun-13_odds.csv\n",
- "data/bf/days\\2019-Jun-14_odds.csv\n",
- "data/bf/days\\2019-Jun-15_odds.csv\n",
- "data/bf/days\\2019-Jun-16_odds.csv\n",
- "data/bf/days\\2019-Jun-17_odds.csv\n",
- "data/bf/days\\2019-Jun-18_odds.csv\n",
- "data/bf/days\\2019-Jun-19_odds.csv\n",
- "data/bf/days\\2019-Jun-20_odds.csv\n",
- "data/bf/days\\2019-Jun-21_odds.csv\n",
- "data/bf/days\\2019-Jun-22_odds.csv\n",
- "data/bf/days\\2019-Jun-23_odds.csv\n",
- "data/bf/days\\2019-Jun-24_odds.csv\n",
- "data/bf/days\\2019-Jun-25_odds.csv\n",
- "data/bf/days\\2019-Jun-26_odds.csv\n",
- "data/bf/days\\2019-Jun-27_odds.csv\n",
- "data/bf/days\\2019-Jun-28_odds.csv\n",
- "data/bf/days\\2019-Jun-29_odds.csv\n",
- "data/bf/days\\2019-Jun-30_odds.csv\n",
- "data/bf/days\\2019-Mar-01_odds.csv\n",
- "data/bf/days\\2019-Mar-02_odds.csv\n",
- "data/bf/days\\2019-Mar-03_odds.csv\n",
- "data/bf/days\\2019-Mar-04_odds.csv\n",
- "data/bf/days\\2019-Mar-05_odds.csv\n",
- "data/bf/days\\2019-Mar-06_odds.csv\n",
- "data/bf/days\\2019-Mar-07_odds.csv\n",
- "data/bf/days\\2019-Mar-08_odds.csv\n",
- "data/bf/days\\2019-Mar-09_odds.csv\n",
- "data/bf/days\\2019-Mar-10_odds.csv\n",
- "data/bf/days\\2019-Mar-11_odds.csv\n",
- "data/bf/days\\2019-Mar-12_odds.csv\n",
- "data/bf/days\\2019-Mar-13_odds.csv\n",
- "data/bf/days\\2019-Mar-14_odds.csv\n",
- "data/bf/days\\2019-Mar-15_odds.csv\n",
- "data/bf/days\\2019-Mar-16_odds.csv\n",
- "data/bf/days\\2019-Mar-17_odds.csv\n",
- "data/bf/days\\2019-Mar-18_odds.csv\n",
- "data/bf/days\\2019-Mar-19_odds.csv\n",
- "data/bf/days\\2019-Mar-20_odds.csv\n",
- "data/bf/days\\2019-Mar-21_odds.csv\n",
- "data/bf/days\\2019-Mar-22_odds.csv\n",
- "data/bf/days\\2019-Mar-23_odds.csv\n",
- "data/bf/days\\2019-Mar-24_odds.csv\n",
- "data/bf/days\\2019-Mar-25_odds.csv\n",
- "data/bf/days\\2019-Mar-26_odds.csv\n",
- "data/bf/days\\2019-Mar-27_odds.csv\n",
- "data/bf/days\\2019-Mar-28_odds.csv\n",
- "data/bf/days\\2019-Mar-29_odds.csv\n",
- "data/bf/days\\2019-Mar-30_odds.csv\n",
- "data/bf/days\\2019-Mar-31_odds.csv\n",
- "data/bf/days\\2019-May-01_odds.csv\n",
- "data/bf/days\\2019-May-02_odds.csv\n",
- "data/bf/days\\2019-May-03_odds.csv\n",
- "data/bf/days\\2019-May-04_odds.csv\n",
- "data/bf/days\\2019-May-05_odds.csv\n",
- "data/bf/days\\2019-May-06_odds.csv\n",
- "data/bf/days\\2019-May-07_odds.csv\n",
- "data/bf/days\\2019-May-08_odds.csv\n",
- "data/bf/days\\2019-May-09_odds.csv\n",
- "data/bf/days\\2019-May-10_odds.csv\n",
- "data/bf/days\\2019-May-11_odds.csv\n",
- "data/bf/days\\2019-May-12_odds.csv\n",
- "data/bf/days\\2019-May-13_odds.csv\n",
- "data/bf/days\\2019-May-14_odds.csv\n",
- "data/bf/days\\2019-May-15_odds.csv\n",
- "data/bf/days\\2019-May-16_odds.csv\n",
- "data/bf/days\\2019-May-17_odds.csv\n",
- "data/bf/days\\2019-May-18_odds.csv\n",
- "data/bf/days\\2019-May-19_odds.csv\n",
- "data/bf/days\\2019-May-20_odds.csv\n",
- "data/bf/days\\2019-May-21_odds.csv\n",
- "data/bf/days\\2019-May-22_odds.csv\n",
- "data/bf/days\\2019-May-23_odds.csv\n",
- "data/bf/days\\2019-May-24_odds.csv\n",
- "data/bf/days\\2019-May-25_odds.csv\n",
- "data/bf/days\\2019-May-26_odds.csv\n",
- "data/bf/days\\2019-May-27_odds.csv\n",
- "data/bf/days\\2019-May-28_odds.csv\n",
- "data/bf/days\\2019-May-29_odds.csv\n",
- "data/bf/days\\2019-May-30_odds.csv\n",
- "data/bf/days\\2019-May-31_odds.csv\n",
- "data/bf/days\\2019-Nov-01_odds.csv\n",
- "data/bf/days\\2019-Nov-02_odds.csv\n",
- "data/bf/days\\2019-Nov-03_odds.csv\n",
- "data/bf/days\\2019-Nov-04_odds.csv\n",
- "data/bf/days\\2019-Nov-05_odds.csv\n",
- "data/bf/days\\2019-Nov-06_odds.csv\n",
- "data/bf/days\\2019-Nov-07_odds.csv\n",
- "data/bf/days\\2019-Nov-08_odds.csv\n",
- "data/bf/days\\2019-Nov-09_odds.csv\n",
- "data/bf/days\\2019-Nov-10_odds.csv\n",
- "data/bf/days\\2019-Nov-11_odds.csv\n",
- "data/bf/days\\2019-Nov-12_odds.csv\n",
- "data/bf/days\\2019-Nov-13_odds.csv\n",
- "data/bf/days\\2019-Nov-14_odds.csv\n",
- "data/bf/days\\2019-Nov-15_odds.csv\n",
- "data/bf/days\\2019-Nov-16_odds.csv\n",
- "data/bf/days\\2019-Nov-17_odds.csv\n",
- "data/bf/days\\2019-Nov-18_odds.csv\n",
- "data/bf/days\\2019-Nov-19_odds.csv\n",
- "data/bf/days\\2019-Nov-20_odds.csv\n",
- "data/bf/days\\2019-Nov-21_odds.csv\n",
- "data/bf/days\\2019-Nov-22_odds.csv\n",
- "data/bf/days\\2019-Nov-23_odds.csv\n",
- "data/bf/days\\2019-Nov-24_odds.csv\n",
- "data/bf/days\\2019-Nov-25_odds.csv\n",
- "data/bf/days\\2019-Nov-26_odds.csv\n",
- "data/bf/days\\2019-Nov-27_odds.csv\n",
- "data/bf/days\\2019-Nov-28_odds.csv\n",
- "data/bf/days\\2019-Nov-29_odds.csv\n",
- "data/bf/days\\2019-Nov-30_odds.csv\n",
- "data/bf/days\\2019-Oct-01_odds.csv\n",
- "data/bf/days\\2019-Oct-02_odds.csv\n",
- "data/bf/days\\2019-Oct-03_odds.csv\n",
- "data/bf/days\\2019-Oct-04_odds.csv\n",
- "data/bf/days\\2019-Oct-05_odds.csv\n",
- "data/bf/days\\2019-Oct-06_odds.csv\n",
- "data/bf/days\\2019-Oct-07_odds.csv\n",
- "data/bf/days\\2019-Oct-08_odds.csv\n",
- "data/bf/days\\2019-Oct-09_odds.csv\n",
- "data/bf/days\\2019-Oct-10_odds.csv\n",
- "data/bf/days\\2019-Oct-11_odds.csv\n",
- "data/bf/days\\2019-Oct-12_odds.csv\n",
- "data/bf/days\\2019-Oct-13_odds.csv\n",
- "data/bf/days\\2019-Oct-14_odds.csv\n",
- "data/bf/days\\2019-Oct-15_odds.csv\n",
- "data/bf/days\\2019-Oct-16_odds.csv\n",
- "data/bf/days\\2019-Oct-17_odds.csv\n",
- "data/bf/days\\2019-Oct-18_odds.csv\n",
- "data/bf/days\\2019-Oct-19_odds.csv\n",
- "data/bf/days\\2019-Oct-20_odds.csv\n",
- "data/bf/days\\2019-Oct-21_odds.csv\n",
- "data/bf/days\\2019-Oct-22_odds.csv\n",
- "data/bf/days\\2019-Oct-23_odds.csv\n",
- "data/bf/days\\2019-Oct-24_odds.csv\n",
- "data/bf/days\\2019-Oct-25_odds.csv\n",
- "data/bf/days\\2019-Oct-26_odds.csv\n",
- "data/bf/days\\2019-Oct-27_odds.csv\n",
- "data/bf/days\\2019-Oct-28_odds.csv\n",
- "data/bf/days\\2019-Oct-29_odds.csv\n",
- "data/bf/days\\2019-Oct-30_odds.csv\n",
- "data/bf/days\\2019-Oct-31_odds.csv\n",
- "data/bf/days\\2019-Sep-01_odds.csv\n",
- "data/bf/days\\2019-Sep-02_odds.csv\n",
- "data/bf/days\\2019-Sep-03_odds.csv\n",
- "data/bf/days\\2019-Sep-04_odds.csv\n",
- "data/bf/days\\2019-Sep-05_odds.csv\n",
- "data/bf/days\\2019-Sep-06_odds.csv\n",
- "data/bf/days\\2019-Sep-07_odds.csv\n",
- "data/bf/days\\2019-Sep-08_odds.csv\n",
- "data/bf/days\\2019-Sep-09_odds.csv\n",
- "data/bf/days\\2019-Sep-10_odds.csv\n",
- "data/bf/days\\2019-Sep-11_odds.csv\n",
- "data/bf/days\\2019-Sep-12_odds.csv\n",
- "data/bf/days\\2019-Sep-13_odds.csv\n",
- "data/bf/days\\2019-Sep-14_odds.csv\n",
- "data/bf/days\\2019-Sep-15_odds.csv\n",
- "data/bf/days\\2019-Sep-16_odds.csv\n",
- "data/bf/days\\2019-Sep-17_odds.csv\n",
- "data/bf/days\\2019-Sep-18_odds.csv\n",
- "data/bf/days\\2019-Sep-19_odds.csv\n",
- "data/bf/days\\2019-Sep-20_odds.csv\n",
- "data/bf/days\\2019-Sep-21_odds.csv\n",
- "data/bf/days\\2019-Sep-22_odds.csv\n",
- "data/bf/days\\2019-Sep-23_odds.csv\n",
- "data/bf/days\\2019-Sep-24_odds.csv\n",
- "data/bf/days\\2019-Sep-25_odds.csv\n",
- "data/bf/days\\2019-Sep-26_odds.csv\n",
- "data/bf/days\\2019-Sep-27_odds.csv\n",
- "data/bf/days\\2019-Sep-28_odds.csv\n",
- "data/bf/days\\2019-Sep-29_odds.csv\n",
- "data/bf/days\\2019-Sep-30_odds.csv\n",
- "data/bf/days\\2020-Aug-01_odds.csv\n",
- "data/bf/days\\2020-Aug-02_odds.csv\n",
- "data/bf/days\\2020-Aug-03_odds.csv\n",
- "data/bf/days\\2020-Aug-04_odds.csv\n",
- "data/bf/days\\2020-Aug-05_odds.csv\n",
- "data/bf/days\\2020-Aug-06_odds.csv\n",
- "data/bf/days\\2020-Aug-07_odds.csv\n",
- "data/bf/days\\2020-Aug-08_odds.csv\n",
- "data/bf/days\\2020-Aug-09_odds.csv\n",
- "data/bf/days\\2020-Aug-10_odds.csv\n",
- "data/bf/days\\2020-Aug-11_odds.csv\n",
- "data/bf/days\\2020-Aug-12_odds.csv\n",
- "data/bf/days\\2020-Aug-13_odds.csv\n",
- "data/bf/days\\2020-Aug-14_odds.csv\n",
- "data/bf/days\\2020-Aug-15_odds.csv\n",
- "data/bf/days\\2020-Aug-16_odds.csv\n",
- "data/bf/days\\2020-Aug-17_odds.csv\n",
- "data/bf/days\\2020-Aug-18_odds.csv\n",
- "data/bf/days\\2020-Aug-19_odds.csv\n",
- "data/bf/days\\2020-Aug-20_odds.csv\n",
- "data/bf/days\\2020-Aug-21_odds.csv\n",
- "data/bf/days\\2020-Aug-22_odds.csv\n",
- "data/bf/days\\2020-Aug-23_odds.csv\n",
- "data/bf/days\\2020-Aug-24_odds.csv\n",
- "data/bf/days\\2020-Aug-25_odds.csv\n",
- "data/bf/days\\2020-Aug-26_odds.csv\n",
- "data/bf/days\\2020-Aug-27_odds.csv\n",
- "data/bf/days\\2020-Aug-28_odds.csv\n",
- "data/bf/days\\2020-Aug-29_odds.csv\n",
- "data/bf/days\\2020-Aug-30_odds.csv\n",
- "data/bf/days\\2020-Aug-31_odds.csv\n",
- "data/bf/days\\2020-Feb-01_odds.csv\n",
- "data/bf/days\\2020-Feb-02_odds.csv\n",
- "data/bf/days\\2020-Feb-03_odds.csv\n",
- "data/bf/days\\2020-Feb-04_odds.csv\n",
- "data/bf/days\\2020-Feb-05_odds.csv\n",
- "data/bf/days\\2020-Feb-06_odds.csv\n",
- "data/bf/days\\2020-Feb-07_odds.csv\n",
- "data/bf/days\\2020-Feb-08_odds.csv\n",
- "data/bf/days\\2020-Feb-09_odds.csv\n",
- "data/bf/days\\2020-Feb-10_odds.csv\n",
- "data/bf/days\\2020-Feb-11_odds.csv\n",
- "data/bf/days\\2020-Feb-12_odds.csv\n",
- "data/bf/days\\2020-Feb-13_odds.csv\n",
- "data/bf/days\\2020-Feb-14_odds.csv\n",
- "data/bf/days\\2020-Feb-15_odds.csv\n",
- "data/bf/days\\2020-Feb-16_odds.csv\n",
- "data/bf/days\\2020-Feb-17_odds.csv\n",
- "data/bf/days\\2020-Feb-18_odds.csv\n",
- "data/bf/days\\2020-Feb-19_odds.csv\n",
- "data/bf/days\\2020-Feb-20_odds.csv\n",
- "data/bf/days\\2020-Feb-21_odds.csv\n",
- "data/bf/days\\2020-Feb-22_odds.csv\n",
- "data/bf/days\\2020-Feb-23_odds.csv\n",
- "data/bf/days\\2020-Feb-24_odds.csv\n",
- "data/bf/days\\2020-Feb-25_odds.csv\n",
- "data/bf/days\\2020-Feb-26_odds.csv\n",
- "data/bf/days\\2020-Feb-27_odds.csv\n",
- "data/bf/days\\2020-Feb-28_odds.csv\n",
- "data/bf/days\\2020-Feb-29_odds.csv\n",
- "data/bf/days\\2020-Jan-01_odds.csv\n",
- "data/bf/days\\2020-Jan-02_odds.csv\n",
- "data/bf/days\\2020-Jan-03_odds.csv\n",
- "data/bf/days\\2020-Jan-04_odds.csv\n",
- "data/bf/days\\2020-Jan-05_odds.csv\n",
- "data/bf/days\\2020-Jan-06_odds.csv\n",
- "data/bf/days\\2020-Jan-07_odds.csv\n",
- "data/bf/days\\2020-Jan-08_odds.csv\n",
- "data/bf/days\\2020-Jan-09_odds.csv\n",
- "data/bf/days\\2020-Jan-10_odds.csv\n",
- "data/bf/days\\2020-Jan-11_odds.csv\n",
- "data/bf/days\\2020-Jan-12_odds.csv\n",
- "data/bf/days\\2020-Jan-13_odds.csv\n",
- "data/bf/days\\2020-Jan-14_odds.csv\n",
- "data/bf/days\\2020-Jan-15_odds.csv\n",
- "data/bf/days\\2020-Jan-16_odds.csv\n",
- "data/bf/days\\2020-Jan-17_odds.csv\n",
- "data/bf/days\\2020-Jan-18_odds.csv\n",
- "data/bf/days\\2020-Jan-19_odds.csv\n",
- "data/bf/days\\2020-Jan-20_odds.csv\n",
- "data/bf/days\\2020-Jan-21_odds.csv\n",
- "data/bf/days\\2020-Jan-22_odds.csv\n",
- "data/bf/days\\2020-Jan-23_odds.csv\n",
- "data/bf/days\\2020-Jan-24_odds.csv\n",
- "data/bf/days\\2020-Jan-25_odds.csv\n",
- "data/bf/days\\2020-Jan-26_odds.csv\n",
- "data/bf/days\\2020-Jan-27_odds.csv\n",
- "data/bf/days\\2020-Jan-28_odds.csv\n",
- "data/bf/days\\2020-Jan-29_odds.csv\n",
- "data/bf/days\\2020-Jan-30_odds.csv\n",
- "data/bf/days\\2020-Jan-31_odds.csv\n",
- "data/bf/days\\2020-Jul-01_odds.csv\n",
- "data/bf/days\\2020-Jul-02_odds.csv\n",
- "data/bf/days\\2020-Jul-03_odds.csv\n",
- "data/bf/days\\2020-Jul-04_odds.csv\n",
- "data/bf/days\\2020-Jul-05_odds.csv\n",
- "data/bf/days\\2020-Jul-06_odds.csv\n",
- "data/bf/days\\2020-Jul-07_odds.csv\n",
- "data/bf/days\\2020-Jul-08_odds.csv\n",
- "data/bf/days\\2020-Jul-09_odds.csv\n",
- "data/bf/days\\2020-Jul-10_odds.csv\n",
- "data/bf/days\\2020-Jul-11_odds.csv\n",
- "data/bf/days\\2020-Jul-12_odds.csv\n",
- "data/bf/days\\2020-Jul-13_odds.csv\n",
- "data/bf/days\\2020-Jul-14_odds.csv\n",
- "data/bf/days\\2020-Jul-15_odds.csv\n",
- "data/bf/days\\2020-Jul-16_odds.csv\n",
- "data/bf/days\\2020-Jul-17_odds.csv\n",
- "data/bf/days\\2020-Jul-18_odds.csv\n",
- "data/bf/days\\2020-Jul-19_odds.csv\n",
- "data/bf/days\\2020-Jul-20_odds.csv\n",
- "data/bf/days\\2020-Jul-21_odds.csv\n",
- "data/bf/days\\2020-Jul-22_odds.csv\n",
- "data/bf/days\\2020-Jul-23_odds.csv\n",
- "data/bf/days\\2020-Jul-24_odds.csv\n",
- "data/bf/days\\2020-Jul-25_odds.csv\n",
- "data/bf/days\\2020-Jul-26_odds.csv\n",
- "data/bf/days\\2020-Jul-27_odds.csv\n",
- "data/bf/days\\2020-Jul-28_odds.csv\n",
- "data/bf/days\\2020-Jul-29_odds.csv\n",
- "data/bf/days\\2020-Jul-30_odds.csv\n",
- "data/bf/days\\2020-Jul-31_odds.csv\n",
- "data/bf/days\\2020-Jun-21_odds.csv\n",
- "data/bf/days\\2020-Mar-01_odds.csv\n",
- "data/bf/days\\2020-Mar-02_odds.csv\n",
- "data/bf/days\\2020-Mar-03_odds.csv\n",
- "data/bf/days\\2020-Mar-04_odds.csv\n",
- "data/bf/days\\2020-Mar-05_odds.csv\n",
- "data/bf/days\\2020-Mar-06_odds.csv\n",
- "data/bf/days\\2020-Mar-07_odds.csv\n",
- "data/bf/days\\2020-Mar-08_odds.csv\n",
- "data/bf/days\\2020-Mar-09_odds.csv\n",
- "data/bf/days\\2020-Mar-10_odds.csv\n",
- "data/bf/days\\2020-Mar-11_odds.csv\n",
- "data/bf/days\\2020-Mar-12_odds.csv\n",
- "data/bf/days\\2020-Mar-13_odds.csv\n",
- "data/bf/days\\2020-Mar-14_odds.csv\n",
- "data/bf/days\\2020-Mar-15_odds.csv\n",
- "data/bf/days\\2020-Mar-16_odds.csv\n",
- "data/bf/days\\2020-Mar-17_odds.csv\n",
- "data/bf/days\\2020-Mar-18_odds.csv\n",
- "data/bf/days\\2020-Mar-19_odds.csv\n",
- "data/bf/days\\2020-Mar-20_odds.csv\n",
- "data/bf/days\\2020-Mar-21_odds.csv\n",
- "data/bf/days\\2020-Mar-22_odds.csv\n",
- "data/bf/days\\2020-Mar-23_odds.csv\n",
- "data/bf/days\\2020-Mar-24_odds.csv\n",
- "data/bf/days\\2020-Mar-25_odds.csv\n",
- "data/bf/days\\2020-Mar-26_odds.csv\n",
- "data/bf/days\\2020-Mar-27_odds.csv\n",
- "data/bf/days\\2020-Mar-28_odds.csv\n",
- "data/bf/days\\2020-Mar-29_odds.csv\n",
- "data/bf/days\\2020-Mar-30_odds.csv\n",
- "data/bf/days\\2020-Mar-31_odds.csv\n",
- "data/bf/days\\2020-Oct-01_odds.csv\n",
- "data/bf/days\\2020-Oct-02_odds.csv\n",
- "data/bf/days\\2020-Oct-03_odds.csv\n",
- "data/bf/days\\2020-Oct-04_odds.csv\n",
- "data/bf/days\\2020-Oct-05_odds.csv\n",
- "data/bf/days\\2020-Oct-06_odds.csv\n",
- "data/bf/days\\2020-Oct-07_odds.csv\n",
- "data/bf/days\\2020-Oct-08_odds.csv\n",
- "data/bf/days\\2020-Oct-09_odds.csv\n",
- "data/bf/days\\2020-Oct-10_odds.csv\n",
- "data/bf/days\\2020-Oct-11_odds.csv\n",
- "data/bf/days\\2020-Oct-12_odds.csv\n",
- "data/bf/days\\2020-Oct-13_odds.csv\n",
- "data/bf/days\\2020-Oct-14_odds.csv\n",
- "data/bf/days\\2020-Oct-15_odds.csv\n",
- "data/bf/days\\2020-Oct-16_odds.csv\n",
- "data/bf/days\\2020-Oct-17_odds.csv\n",
- "data/bf/days\\2020-Oct-18_odds.csv\n",
- "data/bf/days\\2020-Oct-19_odds.csv\n",
- "data/bf/days\\2020-Oct-20_odds.csv\n",
- "data/bf/days\\2020-Oct-21_odds.csv\n",
- "data/bf/days\\2020-Oct-22_odds.csv\n",
- "data/bf/days\\2020-Oct-23_odds.csv\n",
- "data/bf/days\\2020-Oct-24_odds.csv\n",
- "data/bf/days\\2020-Oct-25_odds.csv\n",
- "data/bf/days\\2020-Oct-26_odds.csv\n",
- "data/bf/days\\2020-Oct-27_odds.csv\n",
- "data/bf/days\\2020-Oct-28_odds.csv\n",
- "data/bf/days\\2020-Oct-29_odds.csv\n",
- "data/bf/days\\2020-Oct-30_odds.csv\n",
- "data/bf/days\\2020-Oct-31_odds.csv\n",
- "data/bf/days\\2020-Sep-01_odds.csv\n",
- "data/bf/days\\2020-Sep-02_odds.csv\n",
- "data/bf/days\\2020-Sep-03_odds.csv\n",
- "data/bf/days\\2020-Sep-04_odds.csv\n",
- "data/bf/days\\2020-Sep-05_odds.csv\n",
- "data/bf/days\\2020-Sep-06_odds.csv\n",
- "data/bf/days\\2020-Sep-07_odds.csv\n",
- "data/bf/days\\2020-Sep-08_odds.csv\n",
- "data/bf/days\\2020-Sep-09_odds.csv\n",
- "data/bf/days\\2020-Sep-10_odds.csv\n",
- "data/bf/days\\2020-Sep-11_odds.csv\n",
- "data/bf/days\\2020-Sep-12_odds.csv\n",
- "data/bf/days\\2020-Sep-13_odds.csv\n",
- "data/bf/days\\2020-Sep-14_odds.csv\n",
- "data/bf/days\\2020-Sep-15_odds.csv\n",
- "data/bf/days\\2020-Sep-16_odds.csv\n",
- "data/bf/days\\2020-Sep-17_odds.csv\n",
- "data/bf/days\\2020-Sep-18_odds.csv\n",
- "data/bf/days\\2020-Sep-19_odds.csv\n",
- "data/bf/days\\2020-Sep-20_odds.csv\n",
- "data/bf/days\\2020-Sep-21_odds.csv\n",
- "data/bf/days\\2020-Sep-22_odds.csv\n",
- "data/bf/days\\2020-Sep-23_odds.csv\n",
- "data/bf/days\\2020-Sep-24_odds.csv\n",
- "data/bf/days\\2020-Sep-25_odds.csv\n",
- "data/bf/days\\2020-Sep-26_odds.csv\n",
- "data/bf/days\\2020-Sep-27_odds.csv\n",
- "data/bf/days\\2020-Sep-28_odds.csv\n",
- "data/bf/days\\2020-Sep-29_odds.csv\n",
- "data/bf/days\\2020-Sep-30_odds.csv\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"cols=['eventId','clk','ltp','id','ip']\n",
"cols_noid=['eventId','clk','ltp','ip']\n",
@@ -2244,7 +1269,7 @@
},
{
"cell_type": "code",
- "execution_count": 181,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
diff --git a/data.ipynb b/data.ipynb
new file mode 100644
index 0000000..e9a9390
--- /dev/null
+++ b/data.ipynb
@@ -0,0 +1,1967 @@
+{
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5-final"
+ },
+ "orig_nbformat": 2,
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.5 64-bit ('mlenv': conda)",
+ "metadata": {
+ "interpreter": {
+ "hash": "12f2fd9a8da6c9ddda222d67ff20ee53b82617d5a9ac88eb47f60b586ce1b05e"
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import glob\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import pickle\n",
+ "import pytz\n",
+ "from datetime import timezone,datetime,timedelta\n",
+ "from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler\n",
+ "\n",
+ "import api.util\n",
+ "#from api.data_collector import DataCollector\n",
+ "from api.sofa_dp import SofaDataProvider\n",
+ "from api.op_dp import OpDataProvider\n",
+ "\n",
+ "from IPython.display import display\n",
+ "pd.options.display.max_columns = None\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class DataCollector:\n",
+ " def __init__(self):\n",
+ " self.LOCAL_TZ = 'Asia/Almaty'\n",
+ " self.SERVER_TZ = 'UTC'\n",
+ " self.DATA_PATH='data/'\n",
+ " self.ELO_DATA_PATH='data/elo/'\n",
+ " self.PREREQUISITES_PATH='prerequisites/'\n",
+ " self.COL_CAT=[]\n",
+ " self.COL_NUM=[]\n",
+ " self.COL_LBL=[]\n",
+ " self.COL_INF=[]\n",
+ " \n",
+ " def _load_prerequisites(self,name):\n",
+ " with open(os.path.join(self.PREREQUISITES_PATH, name),'rb') as f:\n",
+ " encoder = pickle.load(f)\n",
+ " return encoder\n",
+ " \n",
+ " def _save_prerequisite(self, name, data):\n",
+ " folder='prerequisites/'\n",
+ " os.makedirs(self.PREREQUISITES_PATH, mode=0o777, exist_ok=True)\n",
+ " with open(os.path.join(self.PREREQUISITES_PATH, name), mode='wb') as f:\n",
+ " pickle.dump(data, f) \n",
+ "\n",
+ " def _ff(self, columns):\n",
+ " if len(self.INCLUDE)>0:\n",
+ " return [x for x in columns if x in self.INCLUDE]\n",
+ " else:\n",
+ " return [x for x in columns if x not in self.EXCLUDE]\n",
+ " \n",
+ " def _encode(self, enctype, features, outs, df):\n",
+ " if (len(self.INCLUDE)>0 and outs[0] in self.INCLUDE) or outs[0] in self.EXCLUDE:\n",
+ " return df\n",
+ " name='_'.join(features)\n",
+ " if self.LOAD:\n",
+ " encoder=self._load_prerequisites(f'{enctype}_{features[0]}')\n",
+ " else:\n",
+ " if enctype=='sc':\n",
+ " encoder = MinMaxScaler()\n",
+ " elif enctype=='le':\n",
+ " encoder = LabelEncoder()\n",
+ " elif enctype=='ohe':\n",
+ " encoder = OneHotEncoder()\n",
+ " if len(features)==1:\n",
+ " encoder.fit(df[features].values)\n",
+ " else:\n",
+ " encoder.fit(pd.concat([pd.DataFrame(df[features[0]].unique(), columns=[name]),pd.DataFrame(df[features[1]].unique(), columns=[name])])[name])\n",
+ " self._save_prerequisite(f'{enctype}_{name}', encoder)\n",
+ " if enctype=='ohe':\n",
+ " return encoder.transform(df[features].values).toarray()\n",
+ " if len(features)==1:\n",
+ " df[outs[0]] = encoder.transform(df[features].values)\n",
+ " else:\n",
+ " df[outs[0]] = encoder.transform(df[features[0]])\n",
+ " df[outs[1]] = encoder.transform(df[features[1]])\n",
+ " return df\n",
+ "\n",
+ " def _encode_teams(self, df):\n",
+ " teams_name=self.ELO_DATA_PATH+'teams.csv'\n",
+ " teams_saved=pd.read_csv(teams_name, index_col=None)\n",
+ " teams=df[['team']].dropna().drop_duplicates()\n",
+ " teams_new=teams[~teams.team.isin(teams_saved.team)]\n",
+ " print(teams_new)\n",
+ " if not teams_new.empty:\n",
+ " print('New teams!')\n",
+ " id=teams_saved.id.max()+1\n",
+ " #id=0\n",
+ " teams_list=[]\n",
+ " for row in teams_new.itertuples():\n",
+ " if len(row.team)>1:\n",
+ " teams_list.append({'team':row.team, 'id':id})\n",
+ " id+=1\n",
+ " #break\n",
+ " teams_saved=pd.concat([teams_saved,pd.DataFrame(teams_list)])\n",
+ " teams_saved.id=teams_saved.id.astype(int)\n",
+ " teams_saved.to_csv(teams_name, index=False)\n",
+ " df=df.merge(teams_saved, on='team', how='left')\n",
+ " return df\n",
+ " \n",
+ " def _add_elo(self, df_src,df_elo):\n",
+ " df_teams=pd.read_csv(self.DATA_PATH+'teams.csv', index_col=None)\n",
+ " df_elo_merged=df_elo.merge(df_teams[['id','tid']], on='id', how='left').drop_duplicates()\n",
+ " df_elo_merged=df_elo_merged.dropna()\n",
+ " df_src['de']=df_src.ds.apply(lambda x: x.strftime('%Y-%m-%d'))\n",
+ " df_elo_merged=df_elo_merged.rename(columns={'tid':'tid1', 'elo':'elo1'})\n",
+ " df_src=df_src.merge(df_elo_merged[['tid1','de','elo1']], on=['tid1','de'], how='left')\n",
+ " df_elo_merged=df_elo_merged.rename(columns={'tid1':'tid2', 'elo1':'elo2'})\n",
+ " df_src=df_src.merge(df_elo_merged[['tid2','de','elo2']], on=['tid2','de'], how='left')\n",
+ " return df_src\n",
+ "\n",
+ " def _provide_elo(self):\n",
+ " df = pd.concat(map(pd.read_csv, glob.glob(os.path.join(self.DATA_PATH+'elo/', 'elo_*.csv'))))\n",
+ " df=df[['Club', 'Country', 'Level', 'Elo', 'From', 'To']]\n",
+ " df.columns=['team', 'country', 'level', 'elo', 'ds', 'de']\n",
+ " df=self._encode_teams(df)\n",
+ " return df\n",
+ "\n",
+ " def _provide_sofa(self):\n",
+ " dp=SofaDataProvider(load=True)\n",
+ " df=dp._load_data()\n",
+ " return df.drop_duplicates(subset='mid', keep='last')\n",
+ "\n",
+ " def _provide_op(self):\n",
+ " dp=OpDataProvider(load=True)\n",
+ " df=dp._load_data()\n",
+ " return df\n",
+ "\n",
+ " def _bind_sofa_op(self,df):\n",
+ " df_op=self._provide_op()\n",
+ " df_binds=pd.read_csv('data/binds_ss_op.csv', index_col=None)\n",
+ " df_op=df_op.merge(df_binds[['op_mid','mid']], left_on='mid', right_on='op_mid')\n",
+ " return df.merge(df_op[['mid_y','odds_away','odds_draw','odds_home','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']], left_on='mid', right_on='mid_y', how='left')\n",
+ "\n",
+ " def _load_data(self):\n",
+ " #df_op=self._provide_sofa()\n",
+ " df_sofa=self._provide_sofa()\n",
+ " df_elo=self._provide_elo()\n",
+ " df_sofa=self._add_elo(df_sofa,df_elo)\n",
+ " df_sofa=self._bind_sofa_op(df_sofa)\n",
+ " return df_sofa\n",
+ " \n",
+ " def provide_data(self, double=True):\n",
+ " df=self._load_data()\n",
+ " df['ps_ft']=df.sc1-df.sc2\n",
+ " df['ps_ht']=df.homeScoreHT-df.awayScoreHT\n",
+ " df['w1']=np.where(df.winner=='home',1,0)\n",
+ " df['wx']=np.where(df.winner=='draw',1,0)\n",
+ " df['w2']=np.where(df.winner=='away',1,0)\n",
+ " df_home=df.copy()\n",
+ " df_home=df_home.rename(columns={'homeScoreHT':'ht1','awayScoreHT':'ht2','sc1':'ft1','sc2':'ft2','vote_home':'vote1','vote_draw':'votex','vote_away':'vote2','home_formation':'form1','away_formation':'form2'})\n",
+ " if double:\n",
+ " df_home['side']=1\n",
+ " df_away=df.copy()\n",
+ " df_away['side']=0\n",
+ " df_away=df_away.rename(columns={'homeScoreHT':'ht2','awayScoreHT':'ht1','sc1':'ft2','sc2':'ft1','vote_home':'vote2','vote_draw':'votex','vote_away':'vote1',\n",
+ " 'home_formation':'form2','away_formation':'form1','w1':'w2','w2':'w1','elo1':'elo2','elo2':'elo1','t1':'t2','t2':'t1',\n",
+ " 'tid1':'tid2','tid2':'tid1','odds_away':'odds_home','odds_home':'odds_away','oddsprob_home':'oddsprob_away',\n",
+ " 'oddsprob_away':'oddsprob_home','drift_home':'drift_away','drift_away':'drift_home'})\n",
+ " df_away['ps_ft']=df_away['ps_ft']*-1\n",
+ " df_away['ps_ht']=df_away['ps_ht']*-1\n",
+ "\n",
+ " df_home=pd.concat([df_home,df_away], axis=0)\n",
+ "\n",
+ " return df_home.reset_index(drop=True)"
+ ]
+ },
+ {
+ "source": [
+ "dp=DataCollector()\n",
+ "dp._provide_op()"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Empty DataFrame\nColumns: [team]\nIndex: []\n"
+ ]
+ }
+ ],
+ "source": [
+ "dp=DataCollector()\n",
+ "#df=dp.provide_data(double=False)\n",
+ "df=dp.provide_data()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#['side', 'country', 'country_id', 'liga', 'mid', 'round', 'ds', 'de', 'tid1', 'tid2', 't1', 't2', 'w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', \n",
+ "# 'winner', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']\n",
+ "COL_CUR=['country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']\n",
+ "COL_PREV=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft']\n",
+ "COL_CAT=['country_id','form1', 'form2']\n",
+ "COL_BIN=['side']\n",
+ "COL_NUM=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']\n",
+ "COL_INF=['side','country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2', 'ft1', 'ft2','winner','odds_away','odds_draw','odds_home']\n",
+ "\n",
+ "#df=pd.read_csv('data/matches.csv', index_col=None)\n",
+ "#df['elo1'].fillna((df['elo1'].mean()), inplace=True)\n",
+ "#df['elo2'].fillna((df['elo2'].mean()), inplace=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def calc_stat(df_, group_feature, name, cols):\n",
+ " df_=df_.sort_values(by='ds')\n",
+ " cols_out=['ds','mid','tid']\n",
+ " df_['C']=df_.groupby([group_feature])['N'].apply(lambda x : x.shift().cumsum())#.cumsum()#-1\n",
+ " #display(df_)\n",
+ " for col in cols:\n",
+ " print(col)\n",
+ " cols_out.append(f'{col}_{name}_avg')\n",
+ " df_[f'{col}_{name}_sum']=df_.groupby([group_feature])[col].apply(lambda x : x.shift().cumsum())#.cumsum()#-df_teams[col]\n",
+ " df_[f'{col}_{name}_avg']=df_[f'{col}_{name}_sum']/df_['C']\n",
+ " df_.drop(columns=[f'{col}_{name}_sum'], inplace=True)\n",
+ " #cols_out.append(f'{col}_{name}_sum')\n",
+ " df_=df_.rename(columns={group_feature:'tid'})\n",
+ " return df_[cols_out]\n",
+ "\n",
+ "def calc_stat_n(df_, group_feature, name, cols, n):\n",
+ " df_=df_.sort_values(by='ds')\n",
+ " cols_out=['ds','mid','tid']\n",
+ " df_['C']=df_.groupby([group_feature])['N'].apply(lambda x : x.shift().rolling(min_periods=1, window=n).sum())\n",
+ " #display(df_)\n",
+ " for col in cols:\n",
+ " print(col)\n",
+ " cols_out.append(f'{col}_{name}_{n}')\n",
+ " df_[f'{col}_{name}_sum']=df_.groupby([group_feature])[col].apply(lambda x : x.shift().rolling(min_periods=1, window=n).sum())\n",
+ " df_[f'{col}_{name}_{n}']=df_[f'{col}_{name}_sum']/df_['C']\n",
+ " df_.drop(columns=[f'{col}_{name}_sum'], inplace=True)\n",
+ " #cols_out.append(f'{col}_{name}_sum')\n",
+ " df_=df_.rename(columns={group_feature:'tid'})\n",
+ " return df_[cols_out]"
+ ]
+ },
+ {
+ "source": [
+ "df['N']=1\n",
+ "cols_cum=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', 'vote1', 'votex', 'vote2', 'elo1', 'elo2','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']\n",
+ "df_all=calc_stat(df, 'tid1', 'tt',cols_cum).sort_values(by=['mid','tid'])\n",
+ "df_all3=calc_stat_n(df, 'tid1', 'tt',cols_cum,3).sort_values(by=['mid','tid'])\n",
+ "colnames=[x.replace('_avg','_form') for x in df_all.columns[3:]]\n",
+ "res=df_all3[df_all3.columns[3:]].values-df_all[df_all.columns[3:]].values\n",
+ "df_form=pd.DataFrame(res,columns=colnames)\n",
+ "df_all=pd.concat([df_all,df_form], axis=1)\n",
+ "\n",
+ "df_home=calc_stat(df[df['side']==1], 'tid1', 'ts', cols_cum).sort_values(by=['mid','tid'])\n",
+ "df_home3=calc_stat_n(df[df['side']==1], 'tid1', 'ts', cols_cum,3).sort_values(by=['mid','tid'])\n",
+ "colnames=[x.replace('_avg','_form') for x in df_home.columns[3:]]\n",
+ "res=df_home3[df_home3.columns[3:]].values-df_home[df_home.columns[3:]].values\n",
+ "df_form=pd.DataFrame(res,columns=colnames)\n",
+ "df_home=pd.concat([df_home,df_form], axis=1)\n",
+ "\n",
+ "df_away=calc_stat(df[df['side']==0], 'tid1', 'ts', cols_cum).reset_index(drop=True).sort_values(by=['mid','tid'])\n",
+ "df_away3=calc_stat_n(df[df['side']==0], 'tid1', 'ts', cols_cum,3).sort_values(by=['mid','tid'])\n",
+ "colnames=[x.replace('_avg','_form') for x in df_away.columns[3:]]\n",
+ "res=df_away3[df_away3.columns[3:]].values-df_away[df_away.columns[3:]].values\n",
+ "df_form=pd.DataFrame(res,columns=colnames)\n",
+ "df_away=pd.concat([df_away,df_form], axis=1)\n",
+ "\n",
+ "#df_home=calc_stat(df, 'tid1', 'th', cols_cum)\n",
+ "#df_away=calc_stat(df, 'tid2', 'ta', cols_cum)\n",
+ "\n",
+ "#df_all.to_csv('data/teams_stats_all.csv', index=False)\n",
+ "#df_home.to_csv('data/teams_stats_home.csv', index=False)\n",
+ "#df_away.to_csv('data/teams_stats_away.csv', index=False)\n",
+ "#df_all3.to_csv('data/teams_stats_all3.csv', index=False)\n",
+ "#df_home3.to_csv('data/teams_stats_home3.csv', index=False)\n",
+ "#df_away3.to_csv('data/teams_stats_away3.csv', index=False)"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n",
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n",
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n",
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n",
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n",
+ "w1\n",
+ "wx\n",
+ "w2\n",
+ "ht1\n",
+ "ht2\n",
+ "ft1\n",
+ "ft2\n",
+ "ps_ht\n",
+ "ps_ft\n",
+ "vote1\n",
+ "votex\n",
+ "vote2\n",
+ "elo1\n",
+ "elo2\n",
+ "oddsprob_home\n",
+ "oddsprob_draw\n",
+ "oddsprob_away\n",
+ "drift_home\n",
+ "drift_away\n",
+ "drift_draw\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " n\n",
+ "elo1_ts_avg 24835\n",
+ "elo2_ts_avg 24910\n",
+ "oddsprob_home_ts_avg 36320\n",
+ "oddsprob_draw_ts_avg 36320\n",
+ "oddsprob_away_ts_avg 36320\n",
+ "drift_home_ts_avg 36320\n",
+ "drift_away_ts_avg 36320\n",
+ "drift_draw_ts_avg 36320\n",
+ "elo1_ts_form 24835\n",
+ "elo2_ts_form 24910\n",
+ "oddsprob_home_ts_form 36320\n",
+ "oddsprob_draw_ts_form 36320\n",
+ "oddsprob_away_ts_form 36320\n",
+ "drift_home_ts_form 36320\n",
+ "drift_away_ts_form 36320\n",
+ "drift_draw_ts_form 36320"
+ ],
+ "text/html": "
\n\n
\n \n \n \n n \n \n \n \n \n elo1_ts_avg \n 24835 \n \n \n elo2_ts_avg \n 24910 \n \n \n oddsprob_home_ts_avg \n 36320 \n \n \n oddsprob_draw_ts_avg \n 36320 \n \n \n oddsprob_away_ts_avg \n 36320 \n \n \n drift_home_ts_avg \n 36320 \n \n \n drift_away_ts_avg \n 36320 \n \n \n drift_draw_ts_avg \n 36320 \n \n \n elo1_ts_form \n 24835 \n \n \n elo2_ts_form \n 24910 \n \n \n oddsprob_home_ts_form \n 36320 \n \n \n oddsprob_draw_ts_form \n 36320 \n \n \n oddsprob_away_ts_form \n 36320 \n \n \n drift_home_ts_form \n 36320 \n \n \n drift_away_ts_form \n 36320 \n \n \n drift_draw_ts_form \n 36320 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 39
+ }
+ ],
+ "source": [
+ "nulls=pd.DataFrame(df_away.isna().sum(), columns=['n'])\n",
+ "nulls[nulls.n>10000]"
+ ]
+ },
+ {
+ "source": [
+ "df_all=pd.read_csv('data/teams_stats_all.csv', index_col=None)\n",
+ "df_home=pd.read_csv('data/teams_stats_home.csv', index_col=None)\n",
+ "df_away=pd.read_csv('data/teams_stats_away.csv', index_col=None)"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ht2 country country_id ds ht1 liga \\\n",
+ "135578 1.0 romania 34 2021-01-10 17:00:00+00:00 1.0 liga-i \n",
+ "\n",
+ " mid round ft1 ft2 t1 t2 \\\n",
+ "135578 9270007 12 1.0 2.0 fc hermannstadt fc viitorul constanta \n",
+ "\n",
+ " tid1 tid2 winner formation_h formation_a form1 form2 vote1 \\\n",
+ "135578 1499 594 home 4-3-3 4-2-3-1 14 18 0.068589 \n",
+ "\n",
+ " votex vote2 pop_r de elo1 elo2 \\\n",
+ "135578 0.307603 0.623808 0 2021-01-10 1285.478027 1352.687866 \n",
+ "\n",
+ " mid_y odds_away odds_draw odds_home oddsprob_home oddsprob_draw \\\n",
+ "135578 NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ " oddsprob_away drift_home drift_away drift_draw ps_ft ps_ht w1 \\\n",
+ "135578 NaN NaN NaN NaN -1.0 -0.0 0 \n",
+ "\n",
+ " wx w2 side N \n",
+ "135578 0 1 0 1 "
+ ],
+ "text/html": "
\n\n
\n \n \n \n ht2 \n country \n country_id \n ds \n ht1 \n liga \n mid \n round \n ft1 \n ft2 \n t1 \n t2 \n tid1 \n tid2 \n winner \n formation_h \n formation_a \n form1 \n form2 \n vote1 \n votex \n vote2 \n pop_r \n de \n elo1 \n elo2 \n mid_y \n odds_away \n odds_draw \n odds_home \n oddsprob_home \n oddsprob_draw \n oddsprob_away \n drift_home \n drift_away \n drift_draw \n ps_ft \n ps_ht \n w1 \n wx \n w2 \n side \n N \n \n \n \n \n 135578 \n 1.0 \n romania \n 34 \n 2021-01-10 17:00:00+00:00 \n 1.0 \n liga-i \n 9270007 \n 12 \n 1.0 \n 2.0 \n fc hermannstadt \n fc viitorul constanta \n 1499 \n 594 \n home \n 4-3-3 \n 4-2-3-1 \n 14 \n 18 \n 0.068589 \n 0.307603 \n 0.623808 \n 0 \n 2021-01-10 \n 1285.478027 \n 1352.687866 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -1.0 \n -0.0 \n 0 \n 0 \n 1 \n 0 \n 1 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ],
+ "source": [
+ "df.iloc[[-2]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " ds mid tid w1_tt_3 wx_tt_3 w2_tt_3 \\\n67788 2021-01-10 17:00:00+00:00 9270007 594 0.000000 0.333333 0.666667 \n135578 2021-01-10 17:00:00+00:00 9270007 1499 0.333333 0.333333 0.333333 \n\n ht1_tt_3 ht2_tt_3 ft1_tt_3 ft2_tt_3 ps_ht_tt_3 ps_ft_tt_3 \\\n67788 0.333333 0.333333 0.666667 1.333333 0.0 -0.666667 \n135578 0.666667 0.666667 1.666667 1.666667 0.0 0.000000 \n\n vote1_tt_3 votex_tt_3 vote2_tt_3 elo1_tt_3 elo2_tt_3 \\\n67788 0.494228 0.333722 0.172050 1368.772664 1292.680379 \n135578 0.383013 0.383214 0.233773 1295.400635 827.787150 \n\n oddsprob_home_tt_3 oddsprob_draw_tt_3 oddsprob_away_tt_3 \\\n67788 NaN NaN NaN \n135578 NaN NaN NaN \n\n drift_home_tt_3 drift_away_tt_3 drift_draw_tt_3 \n67788 NaN NaN NaN \n135578 NaN NaN NaN ",
+ "text/html": "
\n\n
\n \n \n \n ds \n mid \n tid \n w1_tt_3 \n wx_tt_3 \n w2_tt_3 \n ht1_tt_3 \n ht2_tt_3 \n ft1_tt_3 \n ft2_tt_3 \n ps_ht_tt_3 \n ps_ft_tt_3 \n vote1_tt_3 \n votex_tt_3 \n vote2_tt_3 \n elo1_tt_3 \n elo2_tt_3 \n oddsprob_home_tt_3 \n oddsprob_draw_tt_3 \n oddsprob_away_tt_3 \n drift_home_tt_3 \n drift_away_tt_3 \n drift_draw_tt_3 \n \n \n \n \n 67788 \n 2021-01-10 17:00:00+00:00 \n 9270007 \n 594 \n 0.000000 \n 0.333333 \n 0.666667 \n 0.333333 \n 0.333333 \n 0.666667 \n 1.333333 \n 0.0 \n -0.666667 \n 0.494228 \n 0.333722 \n 0.172050 \n 1368.772664 \n 1292.680379 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n 135578 \n 2021-01-10 17:00:00+00:00 \n 9270007 \n 1499 \n 0.333333 \n 0.333333 \n 0.333333 \n 0.666667 \n 0.666667 \n 1.666667 \n 1.666667 \n 0.0 \n 0.000000 \n 0.383013 \n 0.383214 \n 0.233773 \n 1295.400635 \n 827.787150 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " ds mid tid w1_ts_3 wx_ts_3 w2_ts_3 \\\n67788 2021-01-10 17:00:00+00:00 9270007 594 0.333333 0.333333 0.333333 \n\n ht1_ts_3 ht2_ts_3 ft1_ts_3 ft2_ts_3 ps_ht_ts_3 ps_ft_ts_3 \\\n67788 0.0 0.666667 1.666667 1.666667 -0.666667 0.0 \n\n vote1_ts_3 votex_ts_3 vote2_ts_3 elo1_ts_3 elo2_ts_3 \\\n67788 0.634685 0.26855 0.096765 1384.676554 884.928385 \n\n oddsprob_home_ts_3 oddsprob_draw_ts_3 oddsprob_away_ts_3 \\\n67788 NaN NaN NaN \n\n drift_home_ts_3 drift_away_ts_3 drift_draw_ts_3 \n67788 NaN NaN NaN ",
+ "text/html": "
\n\n
\n \n \n \n ds \n mid \n tid \n w1_ts_3 \n wx_ts_3 \n w2_ts_3 \n ht1_ts_3 \n ht2_ts_3 \n ft1_ts_3 \n ft2_ts_3 \n ps_ht_ts_3 \n ps_ft_ts_3 \n vote1_ts_3 \n votex_ts_3 \n vote2_ts_3 \n elo1_ts_3 \n elo2_ts_3 \n oddsprob_home_ts_3 \n oddsprob_draw_ts_3 \n oddsprob_away_ts_3 \n drift_home_ts_3 \n drift_away_ts_3 \n drift_draw_ts_3 \n \n \n \n \n 67788 \n 2021-01-10 17:00:00+00:00 \n 9270007 \n 594 \n 0.333333 \n 0.333333 \n 0.333333 \n 0.0 \n 0.666667 \n 1.666667 \n 1.666667 \n -0.666667 \n 0.0 \n 0.634685 \n 0.26855 \n 0.096765 \n 1384.676554 \n 884.928385 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " ds mid tid w1_ts_3 wx_ts_3 w2_ts_3 \\\n135578 2021-01-10 17:00:00+00:00 9270007 1499 0.0 0.666667 0.333333 \n\n ht1_ts_3 ht2_ts_3 ft1_ts_3 ft2_ts_3 ps_ht_ts_3 ps_ft_ts_3 \\\n135578 0.333333 1.333333 1.0 2.666667 -1.0 -1.666667 \n\n vote1_ts_3 votex_ts_3 vote2_ts_3 elo1_ts_3 elo2_ts_3 \\\n135578 0.126092 0.291419 0.582489 1308.825358 936.720337 \n\n oddsprob_home_ts_3 oddsprob_draw_ts_3 oddsprob_away_ts_3 \\\n135578 NaN NaN NaN \n\n drift_home_ts_3 drift_away_ts_3 drift_draw_ts_3 \n135578 NaN NaN NaN ",
+ "text/html": "
\n\n
\n \n \n \n ds \n mid \n tid \n w1_ts_3 \n wx_ts_3 \n w2_ts_3 \n ht1_ts_3 \n ht2_ts_3 \n ft1_ts_3 \n ft2_ts_3 \n ps_ht_ts_3 \n ps_ft_ts_3 \n vote1_ts_3 \n votex_ts_3 \n vote2_ts_3 \n elo1_ts_3 \n elo2_ts_3 \n oddsprob_home_ts_3 \n oddsprob_draw_ts_3 \n oddsprob_away_ts_3 \n drift_home_ts_3 \n drift_away_ts_3 \n drift_draw_ts_3 \n \n \n \n \n 135578 \n 2021-01-10 17:00:00+00:00 \n 9270007 \n 1499 \n 0.0 \n 0.666667 \n 0.333333 \n 0.333333 \n 1.333333 \n 1.0 \n 2.666667 \n -1.0 \n -1.666667 \n 0.126092 \n 0.291419 \n 0.582489 \n 1308.825358 \n 936.720337 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "id=9270007\n",
+ "tid1=594\n",
+ "tid2=1499\n",
+ "display(df_all3[df_all3.mid==id])\n",
+ "display(df_home3[df_home3.mid==id])\n",
+ "display(df_away3[df_away3.mid==id])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df[(df.tid1==tid1)|(df.tid2==tid1)].to_csv('data/t1.csv', index=False)\n",
+ "df[(df.tid1==tid2)|(df.tid2==tid2)].to_csv('data/t2.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_all_target=df_all.copy()\n",
+ "df_all_target.columns=np.hstack([df_all.columns[:3],['tar_'+x for x in df_all.columns[3:]]])\n",
+ "df_all_opponent=df_all.copy()\n",
+ "df_all_opponent.columns=np.hstack([df_all.columns[:3],['opp_'+x for x in df_all.columns[3:]]])\n",
+ "\n",
+ "df_home_target=df_home.copy()\n",
+ "df_home_target.columns=np.hstack([df_home.columns[:3],['tar_'+x.replace('_th_','_ts_') for x in df_home.columns[3:]]])\n",
+ "df_away_opponent=df_away.copy()\n",
+ "df_away_opponent.columns=np.hstack([df_away.columns[:3],['opp_'+x.replace('_ta_','_ts_') for x in df_away.columns[3:]]])\n",
+ "\n",
+ "df_1=df[df['side']==1][COL_INF+COL_CUR]\n",
+ "df_2=df[df['side']==0][COL_INF+COL_CUR]\n",
+ "\n",
+ "df_1=df_1.merge(df_all_target[df_all_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')\n",
+ "df_1=df_1.merge(df_all_opponent[df_all_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')\n",
+ "df_1=df_1.merge(df_home_target[df_home_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')\n",
+ "df_1=df_1.merge(df_away_opponent[df_away_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')\n",
+ "\n",
+ "df_away_target=df_away.copy()\n",
+ "df_away_target.columns=np.hstack([df_away.columns[:3],['tar_'+x.replace('_ta_','_ts_') for x in df_away.columns[3:]]])\n",
+ "df_home_opponent=df_home.copy()\n",
+ "df_home_opponent.columns=np.hstack([df_home.columns[:3],['opp_'+x.replace('_th_','_ts_') for x in df_home.columns[3:]]])\n",
+ "\n",
+ "df_2=df_2.merge(df_all_target[df_all_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')\n",
+ "df_2=df_2.merge(df_all_opponent[df_all_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')\n",
+ "df_2=df_2.merge(df_away_target[df_away_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')\n",
+ "df_2=df_2.merge(df_home_opponent[df_home_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')\n",
+ "\n",
+ "df_=pd.concat([df_1,df_2], axis=0)\n",
+ "df_.drop(columns=['tid_x','tid_y'], inplace=True)\n",
+ "\n",
+ "cols_tar=[x for x in df_.columns if 'tar_' in x]\n",
+ "cols_opp=[x for x in df_.columns if 'opp_' in x]\n",
+ "cols_diff=[x.replace('tar_','_diff_') for x in cols_tar]\n",
+ "df_.reset_index(drop=True, inplace=True)\n",
+ "df_=pd.concat([df_,pd.DataFrame(df_[cols_tar].values-df_[cols_opp].values, columns=cols_diff)], axis=1)\n",
+ "df_['diff_vote12']=df_['vote1']-df_['vote2']\n",
+ "df_['diff_elo']=df_['elo1']-df_['elo2']\n",
+ "df_['diff_op']=df_['oddsprob_home']-df_['oddsprob_away']\n",
+ "df_.to_csv('data/stats_generated.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 0.251291 0.021498 \n",
+ "135577 NaN NaN \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_away_tt_avg _diff_drift_draw_tt_avg _diff_w1_tt_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 -0.030280 -0.068535 NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 NaN NaN -0.434820 \n",
+ "135576 -0.014798 0.000954 -0.732127 \n",
+ "135577 NaN NaN -0.106704 \n",
+ "135578 NaN NaN -0.499696 \n",
+ "135579 NaN NaN 0.517982 \n",
+ "\n",
+ " _diff_wx_tt_form _diff_w2_tt_form _diff_ht1_tt_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.105209 0.540029 -0.975046 \n",
+ "135576 -0.045724 0.777851 -0.318311 \n",
+ "135577 -0.020191 0.126895 0.439825 \n",
+ "135578 0.371734 0.127962 0.072410 \n",
+ "135579 -0.038012 -0.479971 0.290643 \n",
+ "\n",
+ " _diff_ht2_tt_form _diff_ft1_tt_form _diff_ft2_tt_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.089818 -1.137056 0.556169 \n",
+ "135576 1.057566 -0.682566 1.344846 \n",
+ "135577 0.204557 -0.570615 1.489799 \n",
+ "135578 -0.048456 -0.848358 0.081799 \n",
+ "135579 -0.113012 1.131725 -0.163450 \n",
+ "\n",
+ " _diff_ps_ht_tt_form _diff_ps_ft_tt_form _diff_vote1_tt_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.885228 -1.693225 -0.017424 \n",
+ "135576 -1.375877 -2.027412 0.011281 \n",
+ "135577 0.235268 -2.060413 -0.193290 \n",
+ "135578 0.120866 -0.930158 0.137652 \n",
+ "135579 0.403655 1.295175 0.226092 \n",
+ "\n",
+ " _diff_votex_tt_form _diff_vote2_tt_form _diff_elo1_tt_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 0.017237 0.000187 NaN \n",
+ "135576 -0.082597 0.071315 NaN \n",
+ "135577 -0.049694 0.242984 NaN \n",
+ "135578 0.069640 -0.207292 -71.320950 \n",
+ "135579 0.090213 -0.316305 -148.579675 \n",
+ "\n",
+ " _diff_elo2_tt_form _diff_oddsprob_home_tt_form \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 NaN NaN \n",
+ "135577 NaN -0.216268 \n",
+ "135578 37.963127 NaN \n",
+ "135579 -649.269331 NaN \n",
+ "\n",
+ " _diff_oddsprob_draw_tt_form _diff_oddsprob_away_tt_form \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 NaN NaN \n",
+ "135577 -0.046314 0.26131 \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_home_tt_form _diff_drift_away_tt_form \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 NaN NaN \n",
+ "135577 0.146267 -0.090293 \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_draw_tt_form _diff_w1_ts_avg _diff_wx_ts_avg \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN 0.000000 -1.000000 \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 NaN 0.033333 -0.033333 \n",
+ "135576 NaN -0.025510 0.035714 \n",
+ "135577 0.092841 -0.380952 -0.031746 \n",
+ "135578 NaN -0.308511 0.135880 \n",
+ "135579 NaN 0.053153 0.109910 \n",
+ "\n",
+ " _diff_w2_ts_avg _diff_ht1_ts_avg _diff_ht2_ts_avg _diff_ft1_ts_avg \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 1.000000 0.000000 1.000000 1.000000 \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "... ... ... ... ... \n",
+ "135575 0.000000 -0.008333 0.241667 -0.258333 \n",
+ "135576 -0.010204 0.054422 -0.350340 -0.311224 \n",
+ "135577 0.412698 0.095238 0.158730 -0.428571 \n",
+ "135578 0.172631 -0.386847 0.224855 -0.779739 \n",
+ "135579 -0.163063 0.318919 -0.308108 0.474775 \n",
+ "\n",
+ " _diff_ft2_ts_avg _diff_ps_ht_ts_avg _diff_ps_ft_ts_avg \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 3.000000 -1.000000 -2.000000 \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 0.116667 -0.250000 -0.375000 \n",
+ "135576 -0.511905 0.404762 0.200680 \n",
+ "135577 0.555556 -0.063492 -0.984127 \n",
+ "135578 0.517650 -0.611702 -1.297389 \n",
+ "135579 -0.350450 0.627027 0.825225 \n",
+ "\n",
+ " _diff_vote1_ts_avg _diff_votex_ts_avg _diff_vote2_ts_avg \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 0.409286 0.129286 -0.538571 \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.275966 -0.013866 0.289833 \n",
+ "135576 -0.052472 -0.055544 0.108016 \n",
+ "135577 -0.372634 -0.032947 0.405581 \n",
+ "135578 -0.373574 0.041718 0.331856 \n",
+ "135579 0.179871 -0.017555 -0.162316 \n",
+ "\n",
+ " _diff_elo1_ts_avg _diff_elo2_ts_avg _diff_oddsprob_home_ts_avg \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 73.619507 -89.104736 0.275058 \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 NaN NaN NaN \n",
+ "135576 NaN NaN NaN \n",
+ "135577 NaN NaN NaN \n",
+ "135578 -161.721936 NaN NaN \n",
+ "135579 83.769920 12.512859 NaN \n",
+ "\n",
+ " _diff_oddsprob_draw_ts_avg _diff_oddsprob_away_ts_avg \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 0.075226 -0.350284 \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 NaN NaN \n",
+ "135577 NaN NaN \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_home_ts_avg _diff_drift_away_ts_avg \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 -0.015057 -0.03028 \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 NaN NaN \n",
+ "135577 NaN NaN \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_draw_ts_avg _diff_w1_ts_form _diff_wx_ts_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 -0.068535 NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 NaN 0.046078 -0.031092 \n",
+ "135576 NaN -0.023498 0.030281 \n",
+ "135577 NaN 0.080592 0.316338 \n",
+ "135578 NaN 0.230921 0.305702 \n",
+ "135579 NaN 0.858974 -0.282634 \n",
+ "\n",
+ " _diff_w2_ts_form _diff_ht1_ts_form _diff_ht2_ts_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.014986 0.538585 -0.105322 \n",
+ "135576 -0.006783 0.639050 -0.519622 \n",
+ "135577 -0.396930 -0.199561 -0.451206 \n",
+ "135578 -0.536623 0.110526 -1.380921 \n",
+ "135579 -0.576340 1.009324 -0.471445 \n",
+ "\n",
+ " _diff_ft1_ts_form _diff_ft2_ts_form _diff_ps_ht_ts_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 0.464146 -0.809454 0.643908 \n",
+ "135576 -0.692345 -0.484981 1.158672 \n",
+ "135577 -0.112390 -1.254934 0.251645 \n",
+ "135578 0.492544 -0.621053 1.491447 \n",
+ "135579 1.382576 -1.661131 1.480769 \n",
+ "\n",
+ " _diff_ps_ft_ts_form _diff_vote1_ts_form _diff_votex_ts_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 1.273599 0.164299 -0.013475 \n",
+ "135576 -0.207364 0.226547 -0.040652 \n",
+ "135577 1.142544 0.057987 -0.001629 \n",
+ "135578 1.113596 -0.016601 0.041701 \n",
+ "135579 3.043706 -0.030216 -0.009669 \n",
+ "\n",
+ " _diff_vote2_ts_form _diff_elo1_ts_form _diff_elo2_ts_form \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "135575 -0.150824 NaN NaN \n",
+ "135576 -0.185894 NaN NaN \n",
+ "135577 -0.056358 NaN NaN \n",
+ "135578 -0.025100 NaN NaN \n",
+ "135579 0.039884 NaN NaN \n",
+ "\n",
+ " _diff_oddsprob_home_ts_form _diff_oddsprob_draw_ts_form \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 0.181481 0.115016 \n",
+ "135577 0.116084 0.057468 \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_oddsprob_away_ts_form _diff_drift_home_ts_form \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "... ... ... \n",
+ "135575 NaN NaN \n",
+ "135576 0.113387 -0.175625 \n",
+ "135577 0.018608 0.012968 \n",
+ "135578 NaN NaN \n",
+ "135579 NaN NaN \n",
+ "\n",
+ " _diff_drift_away_ts_form _diff_drift_draw_ts_form diff_vote12 \\\n",
+ "0 NaN NaN 0.220484 \n",
+ "1 NaN NaN 0.457031 \n",
+ "2 NaN NaN 0.550000 \n",
+ "3 NaN NaN 0.060248 \n",
+ "4 NaN NaN 0.637897 \n",
+ "... ... ... ... \n",
+ "135575 NaN NaN 0.400749 \n",
+ "135576 0.093425 -0.033440 -0.080490 \n",
+ "135577 -0.002264 0.024811 -0.552807 \n",
+ "135578 NaN NaN -0.555220 \n",
+ "135579 NaN NaN 0.517467 \n",
+ "\n",
+ " diff_elo diff_op \n",
+ "0 -15.944092 0.025605 \n",
+ "1 1.270142 0.196269 \n",
+ "2 50.959473 0.264351 \n",
+ "3 16.701172 NaN \n",
+ "4 270.143921 NaN \n",
+ "... ... ... \n",
+ "135575 NaN 0.339018 \n",
+ "135576 NaN 0.033653 \n",
+ "135577 NaN NaN \n",
+ "135578 -67.209839 NaN \n",
+ "135579 266.684692 NaN \n",
+ "\n",
+ "[135580 rows x 280 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n side \n country \n liga \n mid \n round \n ds \n t1 \n t2 \n tid1 \n tid2 \n w1 \n wx \n w2 \n ft1 \n ft2 \n winner \n odds_away \n odds_draw \n odds_home \n country_id \n round \n ds \n de \n form1 \n form2 \n vote1 \n votex \n vote2 \n pop_r \n elo1 \n elo2 \n oddsprob_home \n oddsprob_draw \n oddsprob_away \n drift_home \n drift_away \n drift_draw \n tar_w1_tt_avg \n tar_wx_tt_avg \n tar_w2_tt_avg \n tar_ht1_tt_avg \n tar_ht2_tt_avg \n tar_ft1_tt_avg \n tar_ft2_tt_avg \n tar_ps_ht_tt_avg \n tar_ps_ft_tt_avg \n tar_vote1_tt_avg \n tar_votex_tt_avg \n tar_vote2_tt_avg \n tar_elo1_tt_avg \n tar_elo2_tt_avg \n tar_oddsprob_home_tt_avg \n tar_oddsprob_draw_tt_avg \n tar_oddsprob_away_tt_avg \n tar_drift_home_tt_avg \n tar_drift_away_tt_avg \n tar_drift_draw_tt_avg \n tar_w1_tt_form \n tar_wx_tt_form \n tar_w2_tt_form \n tar_ht1_tt_form \n tar_ht2_tt_form \n tar_ft1_tt_form \n tar_ft2_tt_form \n tar_ps_ht_tt_form \n tar_ps_ft_tt_form \n tar_vote1_tt_form \n tar_votex_tt_form \n tar_vote2_tt_form \n tar_elo1_tt_form \n tar_elo2_tt_form \n tar_oddsprob_home_tt_form \n tar_oddsprob_draw_tt_form \n tar_oddsprob_away_tt_form \n tar_drift_home_tt_form \n tar_drift_away_tt_form \n tar_drift_draw_tt_form \n opp_w1_tt_avg \n opp_wx_tt_avg \n opp_w2_tt_avg \n opp_ht1_tt_avg \n opp_ht2_tt_avg \n opp_ft1_tt_avg \n opp_ft2_tt_avg \n opp_ps_ht_tt_avg \n opp_ps_ft_tt_avg \n opp_vote1_tt_avg \n opp_votex_tt_avg \n opp_vote2_tt_avg \n opp_elo1_tt_avg \n opp_elo2_tt_avg \n opp_oddsprob_home_tt_avg \n opp_oddsprob_draw_tt_avg \n opp_oddsprob_away_tt_avg \n opp_drift_home_tt_avg \n opp_drift_away_tt_avg \n opp_drift_draw_tt_avg \n opp_w1_tt_form \n opp_wx_tt_form \n opp_w2_tt_form \n opp_ht1_tt_form \n opp_ht2_tt_form \n opp_ft1_tt_form \n opp_ft2_tt_form \n opp_ps_ht_tt_form \n opp_ps_ft_tt_form \n opp_vote1_tt_form \n opp_votex_tt_form \n opp_vote2_tt_form \n opp_elo1_tt_form \n opp_elo2_tt_form \n opp_oddsprob_home_tt_form \n opp_oddsprob_draw_tt_form \n opp_oddsprob_away_tt_form \n opp_drift_home_tt_form \n opp_drift_away_tt_form \n opp_drift_draw_tt_form \n tar_w1_ts_avg \n tar_wx_ts_avg \n tar_w2_ts_avg \n tar_ht1_ts_avg \n tar_ht2_ts_avg \n tar_ft1_ts_avg \n tar_ft2_ts_avg \n tar_ps_ht_ts_avg \n tar_ps_ft_ts_avg \n tar_vote1_ts_avg \n tar_votex_ts_avg \n tar_vote2_ts_avg \n tar_elo1_ts_avg \n tar_elo2_ts_avg \n tar_oddsprob_home_ts_avg \n tar_oddsprob_draw_ts_avg \n tar_oddsprob_away_ts_avg \n tar_drift_home_ts_avg \n tar_drift_away_ts_avg \n tar_drift_draw_ts_avg \n tar_w1_ts_form \n tar_wx_ts_form \n tar_w2_ts_form \n tar_ht1_ts_form \n tar_ht2_ts_form \n tar_ft1_ts_form \n tar_ft2_ts_form \n tar_ps_ht_ts_form \n tar_ps_ft_ts_form \n tar_vote1_ts_form \n tar_votex_ts_form \n tar_vote2_ts_form \n tar_elo1_ts_form \n tar_elo2_ts_form \n tar_oddsprob_home_ts_form \n tar_oddsprob_draw_ts_form \n tar_oddsprob_away_ts_form \n tar_drift_home_ts_form \n tar_drift_away_ts_form \n tar_drift_draw_ts_form \n opp_w1_ts_avg \n opp_wx_ts_avg \n opp_w2_ts_avg \n opp_ht1_ts_avg \n opp_ht2_ts_avg \n opp_ft1_ts_avg \n opp_ft2_ts_avg \n opp_ps_ht_ts_avg \n opp_ps_ft_ts_avg \n opp_vote1_ts_avg \n opp_votex_ts_avg \n opp_vote2_ts_avg \n opp_elo1_ts_avg \n opp_elo2_ts_avg \n opp_oddsprob_home_ts_avg \n opp_oddsprob_draw_ts_avg \n opp_oddsprob_away_ts_avg \n opp_drift_home_ts_avg \n opp_drift_away_ts_avg \n opp_drift_draw_ts_avg \n opp_w1_ts_form \n opp_wx_ts_form \n opp_w2_ts_form \n opp_ht1_ts_form \n opp_ht2_ts_form \n opp_ft1_ts_form \n opp_ft2_ts_form \n opp_ps_ht_ts_form \n opp_ps_ft_ts_form \n opp_vote1_ts_form \n opp_votex_ts_form \n opp_vote2_ts_form \n opp_elo1_ts_form \n opp_elo2_ts_form \n opp_oddsprob_home_ts_form \n opp_oddsprob_draw_ts_form \n opp_oddsprob_away_ts_form \n opp_drift_home_ts_form \n opp_drift_away_ts_form \n opp_drift_draw_ts_form \n _diff_w1_tt_avg \n _diff_wx_tt_avg \n _diff_w2_tt_avg \n _diff_ht1_tt_avg \n _diff_ht2_tt_avg \n _diff_ft1_tt_avg \n _diff_ft2_tt_avg \n _diff_ps_ht_tt_avg \n _diff_ps_ft_tt_avg \n _diff_vote1_tt_avg \n _diff_votex_tt_avg \n _diff_vote2_tt_avg \n _diff_elo1_tt_avg \n _diff_elo2_tt_avg \n _diff_oddsprob_home_tt_avg \n _diff_oddsprob_draw_tt_avg \n _diff_oddsprob_away_tt_avg \n _diff_drift_home_tt_avg \n _diff_drift_away_tt_avg \n _diff_drift_draw_tt_avg \n _diff_w1_tt_form \n _diff_wx_tt_form \n _diff_w2_tt_form \n _diff_ht1_tt_form \n _diff_ht2_tt_form \n _diff_ft1_tt_form \n _diff_ft2_tt_form \n _diff_ps_ht_tt_form \n _diff_ps_ft_tt_form \n _diff_vote1_tt_form \n _diff_votex_tt_form \n _diff_vote2_tt_form \n _diff_elo1_tt_form \n _diff_elo2_tt_form \n _diff_oddsprob_home_tt_form \n _diff_oddsprob_draw_tt_form \n _diff_oddsprob_away_tt_form \n _diff_drift_home_tt_form \n _diff_drift_away_tt_form \n _diff_drift_draw_tt_form \n _diff_w1_ts_avg \n _diff_wx_ts_avg \n _diff_w2_ts_avg \n _diff_ht1_ts_avg \n _diff_ht2_ts_avg \n _diff_ft1_ts_avg \n _diff_ft2_ts_avg \n _diff_ps_ht_ts_avg \n _diff_ps_ft_ts_avg \n _diff_vote1_ts_avg \n _diff_votex_ts_avg \n _diff_vote2_ts_avg \n _diff_elo1_ts_avg \n _diff_elo2_ts_avg \n _diff_oddsprob_home_ts_avg \n _diff_oddsprob_draw_ts_avg \n _diff_oddsprob_away_ts_avg \n _diff_drift_home_ts_avg \n _diff_drift_away_ts_avg \n _diff_drift_draw_ts_avg \n _diff_w1_ts_form \n _diff_wx_ts_form \n _diff_w2_ts_form \n _diff_ht1_ts_form \n _diff_ht2_ts_form \n _diff_ft1_ts_form \n _diff_ft2_ts_form \n _diff_ps_ht_ts_form \n _diff_ps_ft_ts_form \n _diff_vote1_ts_form \n _diff_votex_ts_form \n _diff_vote2_ts_form \n _diff_elo1_ts_form \n _diff_elo2_ts_form \n _diff_oddsprob_home_ts_form \n _diff_oddsprob_draw_ts_form \n _diff_oddsprob_away_ts_form \n _diff_drift_home_ts_form \n _diff_drift_away_ts_form \n _diff_drift_draw_ts_form \n diff_vote12 \n diff_elo \n diff_op \n \n \n \n \n 0 \n 1 \n england \n championship \n 5583876 \n 25 \n 2015-01-10 12:15:00+00:00 \n ipswich town \n derby county \n 0 \n 86 \n 0 \n 0 \n 1 \n 0.0 \n 1.0 \n away \n 2.82 \n 3.26 \n 2.62 \n 15 \n 25 \n 2015-01-10 12:15:00+00:00 \n 2015-01-10 \n 20 \n 18 \n 0.492176 \n 0.236131 \n 0.271693 \n 2 \n 1583.313477 \n 1599.257568 \n 0.366549 \n 0.292508 \n 0.340944 \n -0.004571 \n 0.011964 \n -0.011880 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.130584 \n 0.065292 \n 0.065292 \n -0.659794 \n -0.412371 \n -1.161512 \n -0.467354 \n -0.247423 \n -0.694158 \n 0.048694 \n 0.057572 \n -0.106265 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.220484 \n -15.944092 \n 0.025605 \n \n \n 1 \n 1 \n france \n ligue-2 \n 5510536 \n 19 \n 2015-01-10 13:00:00+00:00 \n troyes \n stade brestois 29 \n 1 \n 79 \n 1 \n 0 \n 0 \n 1.0 \n 0.0 \n home \n 3.83 \n 3.06 \n 2.13 \n 19 \n 19 \n 2015-01-10 13:00:00+00:00 \n 2015-01-10 \n 20 \n 20 \n 0.578125 \n 0.300781 \n 0.121094 \n 1 \n 1529.688232 \n 1528.418091 \n 0.445430 \n 0.305408 \n 0.249161 \n 0.057560 \n -0.065458 \n -0.044807 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.291667 \n 0.000000 \n -0.291667 \n -0.208333 \n -0.500000 \n 0.125000 \n -0.833333 \n 0.291667 \n 0.958333 \n 0.056494 \n 0.027412 \n -0.083906 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.457031 \n 1.270142 \n 0.196269 \n \n \n 2 \n 1 \n greece \n super-league \n 6570345 \n 11 \n 2015-01-10 13:00:00+00:00 \n veria nfc \n ael kalloni \n 2 \n 67 \n 0 \n 1 \n 0 \n 1.0 \n 1.0 \n draw \n 4.50 \n 3.31 \n 1.87 \n 21 \n 11 \n 2015-01-10 13:00:00+00:00 \n 2015-01-10 \n 14 \n 14 \n 0.678571 \n 0.192857 \n 0.128571 \n 0 \n 1374.405029 \n 1323.445557 \n 0.488185 \n 0.287981 \n 0.223834 \n 0.014080 \n -0.001601 \n -0.031182 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 3.000000 \n -1.000000 \n -2.000000 \n 0.535000 \n 0.295000 \n 0.170000 \n 1390.727661 \n 1373.906616 \n 0.394703 \n 0.305642 \n 0.299655 \n 0.062555 \n -0.061729 \n -0.032193 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.125714 \n 0.165714 \n 0.708571 \n 1317.108154 \n 1463.011353 \n 0.119645 \n 0.230416 \n 0.649939 \n 0.077612 \n -0.031449 \n 0.036342 \n 0.296296 \n -0.203704 \n -0.092593 \n 0.259259 \n -0.240741 \n 0.240741 \n -0.092593 \n 0.500000 \n 0.333333 \n 0.057190 \n 0.045852 \n -0.103043 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 3.000000 \n -1.000000 \n -2.000000 \n 0.535000 \n 0.295000 \n 0.170000 \n 1390.727661 \n 1373.906616 \n 0.394703 \n 0.305642 \n 0.299655 \n 0.062555 \n -0.061729 \n -0.032193 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.125714 \n 0.165714 \n 0.708571 \n 1317.108154 \n 1463.011353 \n 0.119645 \n 0.230416 \n 0.649939 \n 0.077612 \n -0.031449 \n 0.036342 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n -1.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 3.000000 \n -1.000000 \n -2.000000 \n 0.409286 \n 0.129286 \n -0.538571 \n 73.619507 \n -89.104736 \n 0.275058 \n 0.075226 \n -0.350284 \n -0.015057 \n -0.030280 \n -0.068535 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n -1.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 3.000000 \n -1.000000 \n -2.000000 \n 0.409286 \n 0.129286 \n -0.538571 \n 73.619507 \n -89.104736 \n 0.275058 \n 0.075226 \n -0.350284 \n -0.015057 \n -0.03028 \n -0.068535 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.550000 \n 50.959473 \n 0.264351 \n \n \n 3 \n 1 \n england \n premier-league \n 5582834 \n 21 \n 2015-01-10 15:00:00+00:00 \n burnley \n queens park rangers \n 3 \n 442 \n 1 \n 0 \n 0 \n 2.0 \n 1.0 \n home \n NaN \n NaN \n NaN \n 15 \n 21 \n 2015-01-10 15:00:00+00:00 \n 2015-01-10 \n 19 \n 19 \n 0.413851 \n 0.232545 \n 0.353604 \n 3 \n 1593.554565 \n 1576.853394 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 2.000000 \n 3.000000 \n 3.000000 \n -1.000000 \n 0.000000 \n 0.094077 \n 0.146839 \n 0.759084 \n 1589.988403 \n 1664.702393 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 1.000000 \n 0.000000 \n 0.294419 \n 0.212984 \n 0.492597 \n 1575.451904 \n 1679.351807 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.198925 \n 0.204301 \n -0.403226 \n 1.403226 \n -0.419355 \n 1.580645 \n -1.193548 \n 1.822581 \n 2.774194 \n 0.109107 \n 0.020044 \n -0.129151 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n NaN \n NaN \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 2.000000 \n 2.000000 \n -2.000000 \n 0.000000 \n -0.200342 \n -0.066145 \n 0.266487 \n 14.536499 \n -14.649414 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.060248 \n 16.701172 \n NaN \n \n \n 4 \n 1 \n england \n premier-league \n 5582836 \n 21 \n 2015-01-10 15:00:00+00:00 \n chelsea \n newcastle united \n 4 \n 105 \n 1 \n 0 \n 0 \n 2.0 \n 0.0 \n home \n NaN \n NaN \n NaN \n 15 \n 21 \n 2015-01-10 15:00:00+00:00 \n 2015-01-10 \n 14 \n 20 \n 0.771883 \n 0.094132 \n 0.133985 \n 4 \n 1931.280029 \n 1661.136108 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 0.000000 \n 1.000000 \n 1.000000 \n 3.000000 \n 3.000000 \n 5.000000 \n -2.000000 \n -2.000000 \n 0.723240 \n 0.122801 \n 0.153959 \n 1947.358643 \n 1763.849609 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n 1.000000 \n 0.000000 \n 2.000000 \n 1.000000 \n 3.000000 \n 3.000000 \n 1.000000 \n 0.000000 \n 0.759084 \n 0.146839 \n 0.094077 \n 1664.702393 \n 1589.988403 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.188889 \n -0.322222 \n 0.133333 \n -0.233333 \n -0.011111 \n -0.288889 \n -0.055556 \n -0.222222 \n -0.233333 \n -0.030202 \n 0.061090 \n -0.030889 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.000000 \n -1.000000 \n 1.000000 \n -1.000000 \n 2.000000 \n 0.000000 \n 2.000000 \n -3.000000 \n -2.000000 \n -0.035844 \n -0.024039 \n 0.059882 \n 282.656250 \n 173.861206 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.637897 \n 270.143921 \n NaN \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 135575 \n 0 \n portugal \n segunda-liga \n 8956894 \n 15 \n 2021-01-10 15:00:00+00:00 \n feirense \n ud oliveirense \n 280 \n 283 \n 0 \n 0 \n 1 \n 0.0 \n 1.0 \n home \n 5.21 \n 3.22 \n 1.76 \n 33 \n 15 \n 2021-01-10 15:00:00+00:00 \n 2021-01-10 \n 18 \n 18 \n 0.561049 \n 0.278652 \n 0.160300 \n 0 \n NaN \n NaN \n 0.524056 \n 0.290905 \n 0.185039 \n 0.040916 \n -0.106108 \n -0.024004 \n 0.300000 \n 0.208333 \n 0.491667 \n 0.425000 \n 0.650000 \n 0.966667 \n 1.416667 \n -0.225000 \n -0.450000 \n 0.255649 \n 0.266064 \n 0.478287 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.303867 \n -0.014733 \n 0.318600 \n -0.491713 \n 0.119705 \n -1.044199 \n 0.222836 \n -0.611418 \n -1.267035 \n -0.113440 \n 0.019023 \n 0.094417 \n NaN \n NaN \n 0.065688 \n 0.127490 \n 0.276435 \n 0.042375 \n -0.046141 \n -0.006691 \n 0.150000 \n 0.250000 \n 0.600000 \n 0.350000 \n 0.450000 \n 0.800000 \n 1.650000 \n -0.100000 \n -0.850000 \n 0.236938 \n 0.275455 \n 0.487607 \n NaN \n NaN \n 0.011662 \n 0.014126 \n 0.024213 \n 0.002563 \n -0.001498 \n -0.000077 \n 0.130952 \n 0.090476 \n -0.221429 \n 0.483333 \n 0.209524 \n 0.092857 \n -0.333333 \n 0.273810 \n 0.426190 \n -0.096016 \n 0.001786 \n 0.094230 \n 220.859196 \n 311.942668 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.283333 \n 0.216667 \n 0.500000 \n 0.366667 \n 0.616667 \n 0.866667 \n 1.366667 \n -0.250000 \n -0.500000 \n 0.155670 \n 0.258575 \n 0.585755 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.183333 \n -0.300000 \n 0.116667 \n -0.091667 \n -0.133333 \n 0.100000 \n -0.341667 \n 0.041667 \n 0.441667 \n 0.126143 \n 0.009673 \n -0.135816 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.250000 \n 0.250000 \n 0.500000 \n 0.375000 \n 0.375000 \n 1.125000 \n 1.250000 \n 0.000000 \n -0.125000 \n 0.431636 \n 0.272441 \n 0.295922 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.137255 \n -0.268908 \n 0.131653 \n -0.630252 \n -0.028011 \n -0.364146 \n 0.467787 \n -0.602241 \n -0.831933 \n -0.038156 \n 0.023148 \n 0.015008 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.150000 \n -0.041667 \n -0.108333 \n 0.075000 \n 0.200000 \n 0.166667 \n -0.233333 \n -0.125000 \n 0.400000 \n 0.018711 \n -0.009391 \n -0.009320 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.434820 \n -0.105209 \n 0.540029 \n -0.975046 \n -0.089818 \n -1.137056 \n 0.556169 \n -0.885228 \n -1.693225 \n -0.017424 \n 0.017237 \n 0.000187 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.033333 \n -0.033333 \n 0.000000 \n -0.008333 \n 0.241667 \n -0.258333 \n 0.116667 \n -0.250000 \n -0.375000 \n -0.275966 \n -0.013866 \n 0.289833 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.046078 \n -0.031092 \n -0.014986 \n 0.538585 \n -0.105322 \n 0.464146 \n -0.809454 \n 0.643908 \n 1.273599 \n 0.164299 \n -0.013475 \n -0.150824 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.400749 \n NaN \n 0.339018 \n \n \n 135576 \n 0 \n portugal \n segunda-liga \n 8956856 \n 15 \n 2021-01-10 17:00:00+00:00 \n arouca \n casa pia \n 39 \n 1647 \n 0 \n 1 \n 0 \n 1.0 \n 1.0 \n draw \n 2.83 \n 3.09 \n 2.56 \n 33 \n 15 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 20 \n 20 \n 0.258530 \n 0.402450 \n 0.339020 \n 0 \n NaN \n NaN \n 0.364988 \n 0.303677 \n 0.331335 \n 0.218997 \n -0.349433 \n -0.073865 \n 0.303030 \n 0.262626 \n 0.434343 \n 0.505051 \n 0.656566 \n 1.080808 \n 1.393939 \n -0.151515 \n -0.313131 \n 0.302763 \n 0.235111 \n 0.462126 \n NaN \n NaN \n 0.169301 \n 0.166018 \n 0.270741 \n 0.019995 \n -0.013074 \n -0.000682 \n -0.047917 \n -0.256250 \n 0.304167 \n 0.172917 \n 0.864583 \n -0.156250 \n 0.239583 \n -0.691667 \n -0.395833 \n 0.043580 \n 0.019094 \n -0.062674 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.285714 \n 0.333333 \n 0.380952 \n 0.380952 \n 0.857143 \n 1.285714 \n 1.809524 \n -0.476190 \n -0.523810 \n 0.174373 \n 0.280566 \n 0.545060 \n NaN \n NaN \n 0.013056 \n 0.015113 \n 0.019450 \n -0.001503 \n 0.001724 \n -0.001636 \n 0.684211 \n -0.210526 \n -0.473684 \n 0.491228 \n -0.192982 \n 0.526316 \n -1.105263 \n 0.684211 \n 1.631579 \n 0.032299 \n 0.101691 \n -0.133990 \n -31.964004 \n -77.311984 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.224490 \n 0.285714 \n 0.489796 \n 0.387755 \n 0.816327 \n 0.938776 \n 1.571429 \n -0.428571 \n -0.632653 \n 0.177217 \n 0.234965 \n 0.587818 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.341085 \n -0.209302 \n -0.131783 \n 0.201550 \n -0.488372 \n 0.286822 \n -0.922481 \n 0.689922 \n 1.209302 \n 0.041976 \n 0.008832 \n -0.050808 \n NaN \n NaN \n 0.080407 \n 0.056260 \n 0.064884 \n -0.187617 \n 0.113067 \n -0.029840 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.333333 \n 1.166667 \n 1.250000 \n 2.083333 \n -0.833333 \n -0.833333 \n 0.229689 \n 0.290508 \n 0.479802 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.364583 \n -0.239583 \n -0.125000 \n -0.437500 \n 0.031250 \n 0.979167 \n -0.437500 \n -0.468750 \n 1.416667 \n -0.184571 \n 0.049484 \n 0.135086 \n NaN \n NaN \n -0.101074 \n -0.058756 \n -0.048503 \n -0.011992 \n 0.019642 \n 0.003600 \n 0.017316 \n -0.070707 \n 0.053391 \n 0.124098 \n -0.200577 \n -0.204906 \n -0.415584 \n 0.324675 \n 0.210678 \n 0.128389 \n -0.045455 \n -0.082935 \n NaN \n NaN \n 0.156245 \n 0.150906 \n 0.251291 \n 0.021498 \n -0.014798 \n 0.000954 \n -0.732127 \n -0.045724 \n 0.777851 \n -0.318311 \n 1.057566 \n -0.682566 \n 1.344846 \n -1.375877 \n -2.027412 \n 0.011281 \n -0.082597 \n 0.071315 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.025510 \n 0.035714 \n -0.010204 \n 0.054422 \n -0.350340 \n -0.311224 \n -0.511905 \n 0.404762 \n 0.200680 \n -0.052472 \n -0.055544 \n 0.108016 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.023498 \n 0.030281 \n -0.006783 \n 0.639050 \n -0.519622 \n -0.692345 \n -0.484981 \n 1.158672 \n -0.207364 \n 0.226547 \n -0.040652 \n -0.185894 \n NaN \n NaN \n 0.181481 \n 0.115016 \n 0.113387 \n -0.175625 \n 0.093425 \n -0.033440 \n -0.080490 \n NaN \n 0.033653 \n \n \n 135577 \n 0 \n portugal \n segunda-liga \n 8956724 \n 15 \n 2021-01-10 17:00:00+00:00 \n cd cova da piedade \n fc vizela \n 1293 \n 1748 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n home \n NaN \n NaN \n NaN \n 33 \n 15 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 20 \n 18 \n 0.080214 \n 0.286765 \n 0.633021 \n 0 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.272727 \n 0.227273 \n 0.500000 \n 0.363636 \n 0.636364 \n 1.000000 \n 1.454545 \n -0.272727 \n -0.454545 \n 0.230233 \n 0.298016 \n 0.471752 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.054902 \n -0.182353 \n 0.237255 \n 0.207843 \n 0.078431 \n -0.176471 \n 1.735294 \n 0.129412 \n -1.911765 \n -0.182408 \n -0.012041 \n 0.194449 \n NaN \n NaN \n -0.014683 \n 0.075283 \n 0.357047 \n 0.152878 \n -0.147571 \n 0.088309 \n 0.428571 \n 0.285714 \n 0.285714 \n 0.357143 \n 0.571429 \n 1.285714 \n 1.428571 \n -0.214286 \n -0.142857 \n 0.321170 \n 0.290489 \n 0.388341 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.051802 \n -0.162162 \n 0.110360 \n -0.231982 \n -0.126126 \n 0.394144 \n 0.245495 \n -0.105856 \n 0.148649 \n 0.010882 \n 0.037653 \n -0.048535 \n 276.877122 \n -305.888866 \n 0.201585 \n 0.121597 \n 0.095737 \n 0.006611 \n -0.057278 \n -0.004532 \n 0.333333 \n 0.111111 \n 0.555556 \n 0.666667 \n 0.444444 \n 1.000000 \n 1.555556 \n 0.222222 \n -0.555556 \n 0.108789 \n 0.279229 \n 0.611983 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.070175 \n 0.035088 \n -0.105263 \n -0.157895 \n -0.315789 \n -0.122807 \n -0.807018 \n 0.157895 \n 0.684211 \n 0.081109 \n 0.043599 \n -0.124708 \n NaN \n NaN \n 0.054097 \n -0.019669 \n -0.087060 \n -0.030699 \n 0.053728 \n 0.016986 \n 0.714286 \n 0.142857 \n 0.142857 \n 0.571429 \n 0.285714 \n 1.428571 \n 1.000000 \n 0.285714 \n 0.428571 \n 0.481423 \n 0.312175 \n 0.206401 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.010417 \n -0.281250 \n 0.291667 \n 0.041667 \n 0.135417 \n -0.010417 \n 0.447917 \n -0.093750 \n -0.458333 \n 0.023122 \n 0.045228 \n -0.068350 \n NaN \n NaN \n -0.061987 \n -0.077137 \n -0.105667 \n -0.043667 \n 0.055992 \n -0.007825 \n -0.155844 \n -0.058442 \n 0.214286 \n 0.006494 \n 0.064935 \n -0.285714 \n 0.025974 \n -0.058442 \n -0.311688 \n -0.090937 \n 0.007526 \n 0.083411 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.106704 \n -0.020191 \n 0.126895 \n 0.439825 \n 0.204557 \n -0.570615 \n 1.489799 \n 0.235268 \n -2.060413 \n -0.193290 \n -0.049694 \n 0.242984 \n NaN \n NaN \n -0.216268 \n -0.046314 \n 0.26131 \n 0.146267 \n -0.090293 \n 0.092841 \n -0.380952 \n -0.031746 \n 0.412698 \n 0.095238 \n 0.158730 \n -0.428571 \n 0.555556 \n -0.063492 \n -0.984127 \n -0.372634 \n -0.032947 \n 0.405581 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.080592 \n 0.316338 \n -0.396930 \n -0.199561 \n -0.451206 \n -0.112390 \n -1.254934 \n 0.251645 \n 1.142544 \n 0.057987 \n -0.001629 \n -0.056358 \n NaN \n NaN \n 0.116084 \n 0.057468 \n 0.018608 \n 0.012968 \n -0.002264 \n 0.024811 \n -0.552807 \n NaN \n NaN \n \n \n 135578 \n 0 \n romania \n liga-i \n 9270007 \n 12 \n 2021-01-10 17:00:00+00:00 \n fc hermannstadt \n fc viitorul constanta \n 1499 \n 594 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n home \n NaN \n NaN \n NaN \n 34 \n 12 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 14 \n 18 \n 0.068589 \n 0.307603 \n 0.623808 \n 0 \n 1285.478027 \n 1352.687866 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.273684 \n 0.315789 \n 0.410526 \n 0.431579 \n 0.515789 \n 1.021053 \n 1.357895 \n -0.084211 \n -0.336842 \n 0.262723 \n 0.286901 \n 0.450376 \n 1238.260056 \n 1280.394616 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.418994 \n 0.087523 \n 0.331471 \n -0.264432 \n -0.230912 \n -0.774674 \n 0.109870 \n -0.033520 \n -0.884544 \n 0.072367 \n 0.087135 \n -0.159502 \n -36.716250 \n -57.582531 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.418994 \n 0.245810 \n 0.335196 \n 0.597765 \n 0.564246 \n 1.441341 \n 1.223464 \n 0.033520 \n 0.217877 \n 0.421861 \n 0.246587 \n 0.331553 \n 1405.488915 \n 1350.262910 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.080702 \n -0.284211 \n 0.203509 \n -0.336842 \n -0.182456 \n 0.073684 \n 0.028070 \n -0.154386 \n 0.045614 \n -0.065284 \n 0.017495 \n 0.047789 \n 34.604699 \n -95.545658 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.191489 \n 0.340426 \n 0.468085 \n 0.340426 \n 0.702128 \n 0.936170 \n 1.574468 \n -0.361702 \n -0.638298 \n 0.144103 \n 0.278918 \n 0.576980 \n 1251.697931 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.231579 \n 0.080702 \n 0.150877 \n 0.277193 \n -0.101754 \n 0.217544 \n 0.378947 \n 0.378947 \n -0.161404 \n 0.006521 \n 0.064241 \n -0.070762 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.500000 \n 0.204545 \n 0.295455 \n 0.727273 \n 0.477273 \n 1.715909 \n 1.056818 \n 0.250000 \n 0.659091 \n 0.517677 \n 0.237200 \n 0.245123 \n 1413.419866 \n 1351.429943 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.462500 \n -0.225000 \n 0.687500 \n 0.166667 \n 1.279167 \n -0.275000 \n 1.000000 \n -1.112500 \n -1.275000 \n 0.023122 \n 0.022540 \n -0.045662 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.145310 \n 0.069979 \n 0.075331 \n -0.166186 \n -0.048456 \n -0.420288 \n 0.134431 \n -0.117730 \n -0.554719 \n -0.159138 \n 0.040315 \n 0.118823 \n -167.228859 \n -69.868294 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.499696 \n 0.371734 \n 0.127962 \n 0.072410 \n -0.048456 \n -0.848358 \n 0.081799 \n 0.120866 \n -0.930158 \n 0.137652 \n 0.069640 \n -0.207292 \n -71.320950 \n 37.963127 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.308511 \n 0.135880 \n 0.172631 \n -0.386847 \n 0.224855 \n -0.779739 \n 0.517650 \n -0.611702 \n -1.297389 \n -0.373574 \n 0.041718 \n 0.331856 \n -161.721936 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.230921 \n 0.305702 \n -0.536623 \n 0.110526 \n -1.380921 \n 0.492544 \n -0.621053 \n 1.491447 \n 1.113596 \n -0.016601 \n 0.041701 \n -0.025100 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.555220 \n -67.209839 \n NaN \n \n \n 135579 \n 0 \n scotland \n premiership \n 8736289 \n 23 \n 2021-01-10 15:00:00+00:00 \n rangers \n aberdeen \n 306 \n 685 \n 1 \n 0 \n 0 \n 2.0 \n 1.0 \n away \n NaN \n NaN \n NaN \n 36 \n 23 \n 2021-01-10 15:00:00+00:00 \n 2021-01-10 \n 18 \n 5 \n 0.647623 \n 0.222222 \n 0.130155 \n 2 \n 1699.736084 \n 1433.051392 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.525316 \n 0.215190 \n 0.259494 \n 0.879747 \n 0.518987 \n 1.784810 \n 1.151899 \n 0.360759 \n 0.632911 \n 0.568873 \n 0.194163 \n 0.236964 \n 1054.465973 \n 982.701836 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.059649 \n 0.017544 \n -0.077193 \n 0.235088 \n 0.150877 \n 0.645614 \n 0.308772 \n 0.084211 \n 0.336842 \n 0.120290 \n 0.096313 \n -0.216603 \n 57.140579 \n -452.607466 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.480000 \n 0.133333 \n 0.386667 \n 0.666667 \n 0.693333 \n 1.333333 \n 1.320000 \n -0.026667 \n 0.013333 \n 0.440807 \n 0.215922 \n 0.343271 \n 1118.670773 \n 1048.877127 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.458333 \n 0.055556 \n 0.402778 \n -0.055556 \n 0.263889 \n -0.486111 \n 0.472222 \n -0.319444 \n -0.958333 \n -0.105802 \n 0.006100 \n 0.099702 \n 205.720254 \n 196.661865 \n -0.137746 \n -0.058806 \n -0.011781 \n 0.015073 \n 0.014580 \n -0.000370 \n 0.486486 \n 0.243243 \n 0.270270 \n 0.918919 \n 0.558559 \n 1.774775 \n 1.216216 \n 0.360360 \n 0.558559 \n 0.564439 \n 0.197249 \n 0.238312 \n 1114.002228 \n 1024.520863 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.692308 \n -0.153846 \n -0.538462 \n 0.282051 \n -0.282051 \n 1.333333 \n -1.051282 \n 0.564103 \n 2.384615 \n 0.086793 \n 0.021681 \n -0.108474 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.433333 \n 0.133333 \n 0.433333 \n 0.600000 \n 0.866667 \n 1.300000 \n 1.566667 \n -0.266667 \n -0.266667 \n 0.384568 \n 0.214804 \n 0.400628 \n 1030.232308 \n 1012.008004 \n 0.162159 \n 0.124672 \n 0.213169 \n -0.009556 \n 0.014760 \n 0.011214 \n -0.166667 \n 0.128788 \n 0.037879 \n -0.727273 \n 0.189394 \n -0.049242 \n 0.609848 \n -0.916667 \n -0.659091 \n 0.117008 \n 0.031350 \n -0.148358 \n -28.743312 \n -466.501557 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.045316 \n 0.081857 \n -0.127173 \n 0.213080 \n -0.174346 \n 0.451477 \n -0.168101 \n 0.387426 \n 0.619578 \n 0.128066 \n -0.021759 \n -0.106308 \n -64.204800 \n -66.175292 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.517982 \n -0.038012 \n -0.479971 \n 0.290643 \n -0.113012 \n 1.131725 \n -0.163450 \n 0.403655 \n 1.295175 \n 0.226092 \n 0.090213 \n -0.316305 \n -148.579675 \n -649.269331 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.053153 \n 0.109910 \n -0.163063 \n 0.318919 \n -0.308108 \n 0.474775 \n -0.350450 \n 0.627027 \n 0.825225 \n 0.179871 \n -0.017555 \n -0.162316 \n 83.769920 \n 12.512859 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.858974 \n -0.282634 \n -0.576340 \n 1.009324 \n -0.471445 \n 1.382576 \n -1.661131 \n 1.480769 \n 3.043706 \n -0.030216 \n -0.009669 \n 0.039884 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.517467 \n 266.684692 \n NaN \n \n \n
\n
135580 rows × 280 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 43
+ }
+ ],
+ "source": [
+ "df_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " n\n",
+ "_diff_oddsprob_home_tt_form 106514\n",
+ "_diff_oddsprob_draw_tt_form 106514\n",
+ "_diff_oddsprob_away_tt_form 106514\n",
+ "_diff_drift_home_tt_form 106514\n",
+ "_diff_drift_away_tt_form 106514\n",
+ "_diff_drift_draw_tt_form 106514"
+ ],
+ "text/html": "
\n\n
\n \n \n \n n \n \n \n \n \n _diff_oddsprob_home_tt_form \n 106514 \n \n \n _diff_oddsprob_draw_tt_form \n 106514 \n \n \n _diff_oddsprob_away_tt_form \n 106514 \n \n \n _diff_drift_home_tt_form \n 106514 \n \n \n _diff_drift_away_tt_form \n 106514 \n \n \n _diff_drift_draw_tt_form \n 106514 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 42
+ }
+ ],
+ "source": [
+ "nulls=pd.DataFrame(df_.isna().sum(), columns=['n'])\n",
+ "nulls[nulls.n>100000]"
+ ]
+ },
+ {
+ "source": [
+ "# No all\n",
+ "df_1=df.copy()[COL_INF+COL_CUR]\n",
+ "df_1=df_1.merge(df_home[df_home.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')\n",
+ "df_1=df_1.merge(df_away[df_away.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')\n",
+ "df_1.drop(columns=['tid_x','tid_y'], inplace=True)\n",
+ "cols_tar=[x for x in df_1.columns if '_th_' in x]\n",
+ "cols_opp=[x for x in df_1.columns if '_ta_' in x]\n",
+ "cols_diff=[x.replace('_th_','_diff_') for x in cols_tar]\n",
+ "df_1.reset_index(drop=True, inplace=True)\n",
+ "df_1=pd.concat([df_1,pd.DataFrame(df_1[cols_tar].values-df_1[cols_opp].values, columns=cols_diff)], axis=1)\n",
+ "df_1['diff_vote12']=df_1['vote1']-df_1['vote2']\n",
+ "df_1['diff_elo']=df_1['elo1']-df_1['elo2']\n",
+ "df_1['diff_op']=df_1['oddsprob_home']-df_1['oddsprob_away']"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " country liga mid round ds \\\n67788 romania liga-i 9270007 12 2021-01-10 17:00:00+00:00 \n135578 romania liga-i 9270007 12 2021-01-10 17:00:00+00:00 \n\n t1 t2 tid1 tid2 w1 wx w2 \\\n67788 fc viitorul constanta fc hermannstadt 594 1499 1 0 0 \n135578 fc hermannstadt fc viitorul constanta 1499 594 0 0 1 \n\n ft1 ft2 winner odds_away odds_draw odds_home country_id round \\\n67788 2.0 1.0 home NaN NaN NaN 34 12 \n135578 1.0 2.0 home NaN NaN NaN 34 12 \n\n ds de form1 form2 vote1 \\\n67788 2021-01-10 17:00:00+00:00 2021-01-10 18 14 0.623808 \n135578 2021-01-10 17:00:00+00:00 2021-01-10 14 18 0.068589 \n\n votex vote2 pop_r elo1 elo2 oddsprob_home \\\n67788 0.307603 0.068589 0 1352.687866 1285.478027 NaN \n135578 0.307603 0.623808 0 1285.478027 1352.687866 NaN \n\n oddsprob_draw oddsprob_away drift_home drift_away drift_draw \\\n67788 NaN NaN NaN NaN NaN \n135578 NaN NaN NaN NaN NaN \n\n tar_w1_tt_avg tar_wx_tt_avg tar_w2_tt_avg tar_ht1_tt_avg \\\n67788 0.418994 0.245810 0.335196 0.597765 \n135578 0.273684 0.315789 0.410526 0.431579 \n\n tar_ht2_tt_avg tar_ft1_tt_avg tar_ft2_tt_avg tar_ps_ht_tt_avg \\\n67788 0.564246 1.441341 1.223464 0.033520 \n135578 0.515789 1.021053 1.357895 -0.084211 \n\n tar_ps_ft_tt_avg tar_vote1_tt_avg tar_votex_tt_avg \\\n67788 0.217877 0.421861 0.246587 \n135578 -0.336842 0.262723 0.286901 \n\n tar_vote2_tt_avg tar_elo1_tt_avg tar_elo2_tt_avg \\\n67788 0.331553 1405.488915 1350.262910 \n135578 0.450376 1238.260056 1280.394616 \n\n tar_oddsprob_home_tt_avg tar_oddsprob_draw_tt_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n tar_oddsprob_away_tt_avg tar_drift_home_tt_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n tar_drift_away_tt_avg tar_drift_draw_tt_avg opp_w1_tt_avg \\\n67788 NaN NaN 0.273684 \n135578 NaN NaN 0.418994 \n\n opp_wx_tt_avg opp_w2_tt_avg opp_ht1_tt_avg opp_ht2_tt_avg \\\n67788 0.315789 0.410526 0.431579 0.515789 \n135578 0.245810 0.335196 0.597765 0.564246 \n\n opp_ft1_tt_avg opp_ft2_tt_avg opp_ps_ht_tt_avg opp_ps_ft_tt_avg \\\n67788 1.021053 1.357895 -0.084211 -0.336842 \n135578 1.441341 1.223464 0.033520 0.217877 \n\n opp_vote1_tt_avg opp_votex_tt_avg opp_vote2_tt_avg opp_elo1_tt_avg \\\n67788 0.262723 0.286901 0.450376 1238.260056 \n135578 0.421861 0.246587 0.331553 1405.488915 \n\n opp_elo2_tt_avg opp_oddsprob_home_tt_avg opp_oddsprob_draw_tt_avg \\\n67788 1280.394616 NaN NaN \n135578 1350.262910 NaN NaN \n\n opp_oddsprob_away_tt_avg opp_drift_home_tt_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n opp_drift_away_tt_avg opp_drift_draw_tt_avg tar_w1_ts_avg \\\n67788 NaN NaN 0.500000 \n135578 NaN NaN 0.191489 \n\n tar_wx_ts_avg tar_w2_ts_avg tar_ht1_ts_avg tar_ht2_ts_avg \\\n67788 0.204545 0.295455 0.727273 0.477273 \n135578 0.340426 0.468085 0.340426 0.702128 \n\n tar_ft1_ts_avg tar_ft2_ts_avg tar_ps_ht_ts_avg tar_ps_ft_ts_avg \\\n67788 1.715909 1.056818 0.250000 0.659091 \n135578 0.936170 1.574468 -0.361702 -0.638298 \n\n tar_vote1_ts_avg tar_votex_ts_avg tar_vote2_ts_avg tar_elo1_ts_avg \\\n67788 0.517677 0.237200 0.245123 1413.419866 \n135578 0.144103 0.278918 0.576980 1251.697931 \n\n tar_elo2_ts_avg tar_oddsprob_home_ts_avg tar_oddsprob_draw_ts_avg \\\n67788 1351.429943 NaN NaN \n135578 NaN NaN NaN \n\n tar_oddsprob_away_ts_avg tar_drift_home_ts_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n tar_drift_away_ts_avg tar_drift_draw_ts_avg opp_w1_ts_avg \\\n67788 NaN NaN 0.191489 \n135578 NaN NaN 0.500000 \n\n opp_wx_ts_avg opp_w2_ts_avg opp_ht1_ts_avg opp_ht2_ts_avg \\\n67788 0.340426 0.468085 0.340426 0.702128 \n135578 0.204545 0.295455 0.727273 0.477273 \n\n opp_ft1_ts_avg opp_ft2_ts_avg opp_ps_ht_ts_avg opp_ps_ft_ts_avg \\\n67788 0.936170 1.574468 -0.361702 -0.638298 \n135578 1.715909 1.056818 0.250000 0.659091 \n\n opp_vote1_ts_avg opp_votex_ts_avg opp_vote2_ts_avg opp_elo1_ts_avg \\\n67788 0.144103 0.278918 0.576980 1251.697931 \n135578 0.517677 0.237200 0.245123 1413.419866 \n\n opp_elo2_ts_avg opp_oddsprob_home_ts_avg opp_oddsprob_draw_ts_avg \\\n67788 NaN NaN NaN \n135578 1351.429943 NaN NaN \n\n opp_oddsprob_away_ts_avg opp_drift_home_ts_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n opp_drift_away_ts_avg opp_drift_draw_ts_avg _diff_w1_tt_avg \\\n67788 NaN NaN 0.14531 \n135578 NaN NaN -0.14531 \n\n _diff_wx_tt_avg _diff_w2_tt_avg _diff_ht1_tt_avg _diff_ht2_tt_avg \\\n67788 -0.069979 -0.075331 0.166186 0.048456 \n135578 0.069979 0.075331 -0.166186 -0.048456 \n\n _diff_ft1_tt_avg _diff_ft2_tt_avg _diff_ps_ht_tt_avg \\\n67788 0.420288 -0.134431 0.11773 \n135578 -0.420288 0.134431 -0.11773 \n\n _diff_ps_ft_tt_avg _diff_vote1_tt_avg _diff_votex_tt_avg \\\n67788 0.554719 0.159138 -0.040315 \n135578 -0.554719 -0.159138 0.040315 \n\n _diff_vote2_tt_avg _diff_elo1_tt_avg _diff_elo2_tt_avg \\\n67788 -0.118823 167.228859 69.868294 \n135578 0.118823 -167.228859 -69.868294 \n\n _diff_oddsprob_home_tt_avg _diff_oddsprob_draw_tt_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n _diff_oddsprob_away_tt_avg _diff_drift_home_tt_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n _diff_drift_away_tt_avg _diff_drift_draw_tt_avg _diff_w1_ts_avg \\\n67788 NaN NaN 0.308511 \n135578 NaN NaN -0.308511 \n\n _diff_wx_ts_avg _diff_w2_ts_avg _diff_ht1_ts_avg _diff_ht2_ts_avg \\\n67788 -0.13588 -0.172631 0.386847 -0.224855 \n135578 0.13588 0.172631 -0.386847 0.224855 \n\n _diff_ft1_ts_avg _diff_ft2_ts_avg _diff_ps_ht_ts_avg \\\n67788 0.779739 -0.51765 0.611702 \n135578 -0.779739 0.51765 -0.611702 \n\n _diff_ps_ft_ts_avg _diff_vote1_ts_avg _diff_votex_ts_avg \\\n67788 1.297389 0.373574 -0.041718 \n135578 -1.297389 -0.373574 0.041718 \n\n _diff_vote2_ts_avg _diff_elo1_ts_avg _diff_elo2_ts_avg \\\n67788 -0.331856 161.721936 NaN \n135578 0.331856 -161.721936 NaN \n\n _diff_oddsprob_home_ts_avg _diff_oddsprob_draw_ts_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n _diff_oddsprob_away_ts_avg _diff_drift_home_ts_avg \\\n67788 NaN NaN \n135578 NaN NaN \n\n _diff_drift_away_ts_avg _diff_drift_draw_ts_avg diff_vote12 \\\n67788 NaN NaN 0.55522 \n135578 NaN NaN -0.55522 \n\n diff_elo diff_op \n67788 67.209839 NaN \n135578 -67.209839 NaN ",
+ "text/html": "
\n\n
\n \n \n \n country \n liga \n mid \n round \n ds \n t1 \n t2 \n tid1 \n tid2 \n w1 \n wx \n w2 \n ft1 \n ft2 \n winner \n odds_away \n odds_draw \n odds_home \n country_id \n round \n ds \n de \n form1 \n form2 \n vote1 \n votex \n vote2 \n pop_r \n elo1 \n elo2 \n oddsprob_home \n oddsprob_draw \n oddsprob_away \n drift_home \n drift_away \n drift_draw \n tar_w1_tt_avg \n tar_wx_tt_avg \n tar_w2_tt_avg \n tar_ht1_tt_avg \n tar_ht2_tt_avg \n tar_ft1_tt_avg \n tar_ft2_tt_avg \n tar_ps_ht_tt_avg \n tar_ps_ft_tt_avg \n tar_vote1_tt_avg \n tar_votex_tt_avg \n tar_vote2_tt_avg \n tar_elo1_tt_avg \n tar_elo2_tt_avg \n tar_oddsprob_home_tt_avg \n tar_oddsprob_draw_tt_avg \n tar_oddsprob_away_tt_avg \n tar_drift_home_tt_avg \n tar_drift_away_tt_avg \n tar_drift_draw_tt_avg \n opp_w1_tt_avg \n opp_wx_tt_avg \n opp_w2_tt_avg \n opp_ht1_tt_avg \n opp_ht2_tt_avg \n opp_ft1_tt_avg \n opp_ft2_tt_avg \n opp_ps_ht_tt_avg \n opp_ps_ft_tt_avg \n opp_vote1_tt_avg \n opp_votex_tt_avg \n opp_vote2_tt_avg \n opp_elo1_tt_avg \n opp_elo2_tt_avg \n opp_oddsprob_home_tt_avg \n opp_oddsprob_draw_tt_avg \n opp_oddsprob_away_tt_avg \n opp_drift_home_tt_avg \n opp_drift_away_tt_avg \n opp_drift_draw_tt_avg \n tar_w1_ts_avg \n tar_wx_ts_avg \n tar_w2_ts_avg \n tar_ht1_ts_avg \n tar_ht2_ts_avg \n tar_ft1_ts_avg \n tar_ft2_ts_avg \n tar_ps_ht_ts_avg \n tar_ps_ft_ts_avg \n tar_vote1_ts_avg \n tar_votex_ts_avg \n tar_vote2_ts_avg \n tar_elo1_ts_avg \n tar_elo2_ts_avg \n tar_oddsprob_home_ts_avg \n tar_oddsprob_draw_ts_avg \n tar_oddsprob_away_ts_avg \n tar_drift_home_ts_avg \n tar_drift_away_ts_avg \n tar_drift_draw_ts_avg \n opp_w1_ts_avg \n opp_wx_ts_avg \n opp_w2_ts_avg \n opp_ht1_ts_avg \n opp_ht2_ts_avg \n opp_ft1_ts_avg \n opp_ft2_ts_avg \n opp_ps_ht_ts_avg \n opp_ps_ft_ts_avg \n opp_vote1_ts_avg \n opp_votex_ts_avg \n opp_vote2_ts_avg \n opp_elo1_ts_avg \n opp_elo2_ts_avg \n opp_oddsprob_home_ts_avg \n opp_oddsprob_draw_ts_avg \n opp_oddsprob_away_ts_avg \n opp_drift_home_ts_avg \n opp_drift_away_ts_avg \n opp_drift_draw_ts_avg \n _diff_w1_tt_avg \n _diff_wx_tt_avg \n _diff_w2_tt_avg \n _diff_ht1_tt_avg \n _diff_ht2_tt_avg \n _diff_ft1_tt_avg \n _diff_ft2_tt_avg \n _diff_ps_ht_tt_avg \n _diff_ps_ft_tt_avg \n _diff_vote1_tt_avg \n _diff_votex_tt_avg \n _diff_vote2_tt_avg \n _diff_elo1_tt_avg \n _diff_elo2_tt_avg \n _diff_oddsprob_home_tt_avg \n _diff_oddsprob_draw_tt_avg \n _diff_oddsprob_away_tt_avg \n _diff_drift_home_tt_avg \n _diff_drift_away_tt_avg \n _diff_drift_draw_tt_avg \n _diff_w1_ts_avg \n _diff_wx_ts_avg \n _diff_w2_ts_avg \n _diff_ht1_ts_avg \n _diff_ht2_ts_avg \n _diff_ft1_ts_avg \n _diff_ft2_ts_avg \n _diff_ps_ht_ts_avg \n _diff_ps_ft_ts_avg \n _diff_vote1_ts_avg \n _diff_votex_ts_avg \n _diff_vote2_ts_avg \n _diff_elo1_ts_avg \n _diff_elo2_ts_avg \n _diff_oddsprob_home_ts_avg \n _diff_oddsprob_draw_ts_avg \n _diff_oddsprob_away_ts_avg \n _diff_drift_home_ts_avg \n _diff_drift_away_ts_avg \n _diff_drift_draw_ts_avg \n diff_vote12 \n diff_elo \n diff_op \n \n \n \n \n 67788 \n romania \n liga-i \n 9270007 \n 12 \n 2021-01-10 17:00:00+00:00 \n fc viitorul constanta \n fc hermannstadt \n 594 \n 1499 \n 1 \n 0 \n 0 \n 2.0 \n 1.0 \n home \n NaN \n NaN \n NaN \n 34 \n 12 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 18 \n 14 \n 0.623808 \n 0.307603 \n 0.068589 \n 0 \n 1352.687866 \n 1285.478027 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.418994 \n 0.245810 \n 0.335196 \n 0.597765 \n 0.564246 \n 1.441341 \n 1.223464 \n 0.033520 \n 0.217877 \n 0.421861 \n 0.246587 \n 0.331553 \n 1405.488915 \n 1350.262910 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.273684 \n 0.315789 \n 0.410526 \n 0.431579 \n 0.515789 \n 1.021053 \n 1.357895 \n -0.084211 \n -0.336842 \n 0.262723 \n 0.286901 \n 0.450376 \n 1238.260056 \n 1280.394616 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.500000 \n 0.204545 \n 0.295455 \n 0.727273 \n 0.477273 \n 1.715909 \n 1.056818 \n 0.250000 \n 0.659091 \n 0.517677 \n 0.237200 \n 0.245123 \n 1413.419866 \n 1351.429943 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.191489 \n 0.340426 \n 0.468085 \n 0.340426 \n 0.702128 \n 0.936170 \n 1.574468 \n -0.361702 \n -0.638298 \n 0.144103 \n 0.278918 \n 0.576980 \n 1251.697931 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.14531 \n -0.069979 \n -0.075331 \n 0.166186 \n 0.048456 \n 0.420288 \n -0.134431 \n 0.11773 \n 0.554719 \n 0.159138 \n -0.040315 \n -0.118823 \n 167.228859 \n 69.868294 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.308511 \n -0.13588 \n -0.172631 \n 0.386847 \n -0.224855 \n 0.779739 \n -0.51765 \n 0.611702 \n 1.297389 \n 0.373574 \n -0.041718 \n -0.331856 \n 161.721936 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.55522 \n 67.209839 \n NaN \n \n \n 135578 \n romania \n liga-i \n 9270007 \n 12 \n 2021-01-10 17:00:00+00:00 \n fc hermannstadt \n fc viitorul constanta \n 1499 \n 594 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n home \n NaN \n NaN \n NaN \n 34 \n 12 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 14 \n 18 \n 0.068589 \n 0.307603 \n 0.623808 \n 0 \n 1285.478027 \n 1352.687866 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.273684 \n 0.315789 \n 0.410526 \n 0.431579 \n 0.515789 \n 1.021053 \n 1.357895 \n -0.084211 \n -0.336842 \n 0.262723 \n 0.286901 \n 0.450376 \n 1238.260056 \n 1280.394616 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.418994 \n 0.245810 \n 0.335196 \n 0.597765 \n 0.564246 \n 1.441341 \n 1.223464 \n 0.033520 \n 0.217877 \n 0.421861 \n 0.246587 \n 0.331553 \n 1405.488915 \n 1350.262910 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.191489 \n 0.340426 \n 0.468085 \n 0.340426 \n 0.702128 \n 0.936170 \n 1.574468 \n -0.361702 \n -0.638298 \n 0.144103 \n 0.278918 \n 0.576980 \n 1251.697931 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 0.500000 \n 0.204545 \n 0.295455 \n 0.727273 \n 0.477273 \n 1.715909 \n 1.056818 \n 0.250000 \n 0.659091 \n 0.517677 \n 0.237200 \n 0.245123 \n 1413.419866 \n 1351.429943 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.14531 \n 0.069979 \n 0.075331 \n -0.166186 \n -0.048456 \n -0.420288 \n 0.134431 \n -0.11773 \n -0.554719 \n -0.159138 \n 0.040315 \n 0.118823 \n -167.228859 \n -69.868294 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.308511 \n 0.13588 \n 0.172631 \n -0.386847 \n 0.224855 \n -0.779739 \n 0.51765 \n -0.611702 \n -1.297389 \n -0.373574 \n 0.041718 \n 0.331856 \n -161.721936 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n -0.55522 \n -67.209839 \n NaN \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "display(df_[df_.mid==id])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ds mid tid w1_th_avg wx_th_avg \\\n",
+ "67789 2021-01-10 15:00:00+00:00 8736289 685 0.433333 0.133333 \n",
+ "\n",
+ " w2_th_avg ht1_th_avg ht2_th_avg ft1_th_avg ft2_th_avg \\\n",
+ "67789 0.433333 0.6 0.866667 1.3 1.566667 \n",
+ "\n",
+ " ps_ht_th_avg ps_ft_th_avg vote1_th_avg votex_th_avg vote2_th_avg \\\n",
+ "67789 -0.266667 -0.266667 0.384568 0.214804 0.400628 \n",
+ "\n",
+ " elo1_th_avg elo2_th_avg oddsprob_home_th_avg oddsprob_draw_th_avg \\\n",
+ "67789 1030.232308 1012.008004 0.162159 0.124672 \n",
+ "\n",
+ " oddsprob_away_th_avg drift_home_th_avg drift_away_th_avg \\\n",
+ "67789 0.213169 -0.009556 0.01476 \n",
+ "\n",
+ " drift_draw_th_avg \n",
+ "67789 0.011214 "
+ ],
+ "text/html": "
\n\n
\n \n \n \n ds \n mid \n tid \n w1_th_avg \n wx_th_avg \n w2_th_avg \n ht1_th_avg \n ht2_th_avg \n ft1_th_avg \n ft2_th_avg \n ps_ht_th_avg \n ps_ft_th_avg \n vote1_th_avg \n votex_th_avg \n vote2_th_avg \n elo1_th_avg \n elo2_th_avg \n oddsprob_home_th_avg \n oddsprob_draw_th_avg \n oddsprob_away_th_avg \n drift_home_th_avg \n drift_away_th_avg \n drift_draw_th_avg \n \n \n \n \n 67789 \n 2021-01-10 15:00:00+00:00 \n 8736289 \n 685 \n 0.433333 \n 0.133333 \n 0.433333 \n 0.6 \n 0.866667 \n 1.3 \n 1.566667 \n -0.266667 \n -0.266667 \n 0.384568 \n 0.214804 \n 0.400628 \n 1030.232308 \n 1012.008004 \n 0.162159 \n 0.124672 \n 0.213169 \n -0.009556 \n 0.01476 \n 0.011214 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 67
+ }
+ ],
+ "source": [
+ "df_home[df_home.mid==8736289]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ds mid tid w1_ta_avg wx_ta_avg \\\n",
+ "67789 2021-01-10 15:00:00+00:00 8736289 306 0.27027 0.243243 \n",
+ "\n",
+ " w2_ta_avg ht1_ta_avg ht2_ta_avg ft1_ta_avg ft2_ta_avg \\\n",
+ "67789 0.486486 0.558559 0.918919 1.216216 1.774775 \n",
+ "\n",
+ " ps_ht_ta_avg ps_ft_ta_avg vote1_ta_avg votex_ta_avg vote2_ta_avg \\\n",
+ "67789 -0.36036 -0.558559 0.238312 0.197249 0.564439 \n",
+ "\n",
+ " elo1_ta_avg elo2_ta_avg oddsprob_home_ta_avg oddsprob_draw_ta_avg \\\n",
+ "67789 1024.520863 1114.002228 NaN NaN \n",
+ "\n",
+ " oddsprob_away_ta_avg drift_home_ta_avg drift_away_ta_avg \\\n",
+ "67789 NaN NaN NaN \n",
+ "\n",
+ " drift_draw_ta_avg \n",
+ "67789 NaN "
+ ],
+ "text/html": "
\n\n
\n \n \n \n ds \n mid \n tid \n w1_ta_avg \n wx_ta_avg \n w2_ta_avg \n ht1_ta_avg \n ht2_ta_avg \n ft1_ta_avg \n ft2_ta_avg \n ps_ht_ta_avg \n ps_ft_ta_avg \n vote1_ta_avg \n votex_ta_avg \n vote2_ta_avg \n elo1_ta_avg \n elo2_ta_avg \n oddsprob_home_ta_avg \n oddsprob_draw_ta_avg \n oddsprob_away_ta_avg \n drift_home_ta_avg \n drift_away_ta_avg \n drift_draw_ta_avg \n \n \n \n \n 67789 \n 2021-01-10 15:00:00+00:00 \n 8736289 \n 306 \n 0.27027 \n 0.243243 \n 0.486486 \n 0.558559 \n 0.918919 \n 1.216216 \n 1.774775 \n -0.36036 \n -0.558559 \n 0.238312 \n 0.197249 \n 0.564439 \n 1024.520863 \n 1114.002228 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 68
+ }
+ ],
+ "source": [
+ "df_away[df_away.mid==8736289]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_.to_csv('data/stats_generated.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv('data/stats_generated.csv', index_col=None)\n",
+ "df=df.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "COL_CUR=['side', 'country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']\n",
+ "COL_PREV=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft']\n",
+ "COL_CAT=['country_id','form1', 'form2']\n",
+ "COL_BIN=['side']\n",
+ "\n",
+ "COL_INF=['country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2', 'ft1', 'ft2','winner']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "472463 0.272727 0.227273 \n",
+ "130138 0.00 0.269385 0.329496 0.273684 0.315789 \n",
+ "130139 0.50 0.639889 0.401371 0.525316 0.215190 \n",
+ "\n",
+ " tar_w2_tt_avg tar_ht1_tt_avg tar_ht2_tt_avg tar_ft1_tt_avg \\\n",
+ "0 0.000000 0.571429 0.000000 0.500000 \n",
+ "1 0.000000 0.000000 0.000000 0.333333 \n",
+ "2 0.000000 0.857143 0.000000 1.000000 \n",
+ "3 0.000000 0.571429 0.000000 0.666667 \n",
+ "4 1.000000 0.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.491667 0.121429 0.130000 0.161111 \n",
+ "130136 0.434343 0.144300 0.131313 0.180135 \n",
+ "130137 0.500000 0.103896 0.127273 0.166667 \n",
+ "130138 0.410526 0.123308 0.103158 0.170175 \n",
+ "130139 0.259494 0.251356 0.103797 0.297468 \n",
+ "\n",
+ " tar_ft2_tt_avg tar_ps_ht_tt_avg tar_ps_ft_tt_avg tar_vote1_tt_avg \\\n",
+ "0 0.000000 0.823529 0.782609 0.858299 \n",
+ "1 0.000000 0.588235 0.695652 0.085197 \n",
+ "2 0.166667 0.941176 0.956522 0.874874 \n",
+ "3 0.000000 0.823529 0.869565 0.858765 \n",
+ "4 0.166667 0.588235 0.434783 0.121502 \n",
+ "... ... ... ... ... \n",
+ "130135 0.236111 0.561765 0.482609 0.230542 \n",
+ "130136 0.232323 0.570410 0.494510 0.284858 \n",
+ "130137 0.242424 0.556150 0.482213 0.201240 \n",
+ "130138 0.226316 0.578328 0.492449 0.238697 \n",
+ "130139 0.191983 0.630678 0.576775 0.591648 \n",
+ "\n",
+ " tar_votex_tt_avg tar_vote2_tt_avg tar_elo1_tt_avg tar_elo2_tt_avg \\\n",
+ "0 0.162021 0.104519 1.000000 0.635697 \n",
+ "1 0.391887 0.799607 0.464370 0.471198 \n",
+ "2 0.211592 0.060606 0.464370 0.471198 \n",
+ "3 0.293601 0.033987 0.464370 0.471198 \n",
+ "4 0.220921 0.852222 0.464370 0.471198 \n",
+ "... ... ... ... ... \n",
+ "130135 0.609309 0.530193 0.355792 0.456196 \n",
+ "130136 0.538426 0.510493 0.409076 0.472245 \n",
+ "130137 0.682482 0.522227 0.464370 0.471198 \n",
+ "130138 0.657029 0.496170 0.247452 0.328089 \n",
+ "130139 0.444651 0.236022 0.419905 0.396232 \n",
+ "\n",
+ " opp_w1_tt_avg opp_wx_tt_avg opp_w2_tt_avg opp_ht1_tt_avg \\\n",
+ "0 0.708861 0.189873 0.101266 0.211573 \n",
+ "1 0.442748 0.282443 0.274809 0.196292 \n",
+ "2 0.408333 0.266667 0.325000 0.180952 \n",
+ "3 0.379032 0.274194 0.346774 0.168203 \n",
+ "4 0.000000 0.000000 1.000000 0.095238 \n",
+ "... ... ... ... ... \n",
+ "130135 0.250000 0.250000 0.500000 0.107143 \n",
+ "130136 0.250000 0.250000 0.500000 0.095238 \n",
+ "130137 0.714286 0.142857 0.142857 0.163265 \n",
+ "130138 0.500000 0.204545 0.295455 0.207792 \n",
+ "130139 0.433333 0.133333 0.433333 0.171429 \n",
+ "\n",
+ " opp_ht2_tt_avg opp_ft1_tt_avg opp_ft2_tt_avg opp_ps_ht_tt_avg \\\n",
+ "0 0.041772 0.292194 0.094937 0.650782 \n",
+ "1 0.103817 0.258270 0.213740 0.607993 \n",
+ "2 0.120000 0.229167 0.225000 0.592157 \n",
+ "3 0.087097 0.206989 0.181452 0.606262 \n",
+ "4 0.133333 0.055556 0.277778 0.549020 \n",
+ "... ... ... ... ... \n",
+ "130135 0.075000 0.187500 0.208333 0.588235 \n",
+ "130136 0.233333 0.208333 0.347222 0.490196 \n",
+ "130137 0.057143 0.238095 0.166667 0.621849 \n",
+ "130138 0.095455 0.285985 0.176136 0.617647 \n",
+ "130139 0.173333 0.216667 0.261111 0.556863 \n",
+ "\n",
+ " opp_ps_ft_tt_avg opp_vote1_tt_avg opp_votex_tt_avg \\\n",
+ "0 0.624656 0.708244 0.362246 \n",
+ "1 0.544972 0.475809 0.485643 \n",
+ "2 0.523913 0.424758 0.522580 \n",
+ "3 0.535063 0.489853 0.558270 \n",
+ "4 0.405797 0.245585 0.347700 \n",
+ "... ... ... ... \n",
+ "130135 0.510870 0.433433 0.623915 \n",
+ "130136 0.449275 0.200614 0.665290 \n",
+ "130137 0.559006 0.490831 0.714909 \n",
+ "130138 0.579051 0.532626 0.543209 \n",
+ "130139 0.498551 0.379169 0.491921 \n",
+ "\n",
+ " opp_vote2_tt_avg opp_elo1_tt_avg opp_elo2_tt_avg tar_w1_ts_avg \\\n",
+ "0 0.156602 0.836029 0.678962 1.000000 \n",
+ "1 0.336685 0.648486 0.676920 1.000000 \n",
+ "2 0.371003 0.468630 0.516784 1.000000 \n",
+ "3 0.283177 0.581739 0.598190 1.000000 \n",
+ "4 0.653541 0.464370 0.471198 0.000000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.307892 0.464370 0.471198 0.283333 \n",
+ "130136 0.532040 0.464370 0.471198 0.224490 \n",
+ "130137 0.198766 0.464370 0.471198 0.333333 \n",
+ "130138 0.245968 0.385910 0.368423 0.191489 \n",
+ "130139 0.435527 0.388752 0.427319 0.486486 \n",
+ "\n",
+ " tar_wx_ts_avg tar_w2_ts_avg tar_ht1_ts_avg tar_ht2_ts_avg \\\n",
+ "0 0.000000 0.000000 0.500000 0.000000 \n",
+ "1 0.000000 0.000000 0.000000 0.000000 \n",
+ "2 0.000000 0.000000 0.750000 0.000000 \n",
+ "3 0.000000 0.000000 0.500000 0.000000 \n",
+ "4 0.000000 1.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.216667 0.500000 0.091667 0.123333 \n",
+ "130136 0.285714 0.489796 0.096939 0.163265 \n",
+ "130137 0.111111 0.555556 0.166667 0.088889 \n",
+ "130138 0.340426 0.468085 0.085106 0.140426 \n",
+ "130139 0.243243 0.270270 0.229730 0.111712 \n",
+ "\n",
+ " tar_ft1_ts_avg tar_ft2_ts_avg tar_ps_ht_ts_avg tar_ps_ft_ts_avg \\\n",
+ "0 0.500000 0.000000 0.777778 0.785714 \n",
+ "1 0.333333 0.000000 0.555556 0.714286 \n",
+ "2 1.000000 0.125000 0.888889 0.928571 \n",
+ "3 0.666667 0.000000 0.777778 0.857143 \n",
+ "4 0.000000 0.125000 0.555556 0.500000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.144444 0.170833 0.527778 0.535714 \n",
+ "130136 0.156463 0.196429 0.507937 0.526239 \n",
+ "130137 0.166667 0.194444 0.580247 0.531746 \n",
+ "130138 0.156028 0.196809 0.515366 0.525836 \n",
+ "130139 0.295796 0.152027 0.595596 0.611326 \n",
+ "\n",
+ " tar_vote1_ts_avg tar_votex_ts_avg tar_vote2_ts_avg tar_elo1_ts_avg \\\n",
+ "0 0.861679 0.162021 0.097464 0.995360 \n",
+ "1 0.107016 0.391887 0.745632 0.470917 \n",
+ "2 0.877859 0.211592 0.056515 0.470917 \n",
+ "3 0.862134 0.293601 0.031693 0.470917 \n",
+ "4 0.142456 0.220921 0.794695 0.470917 \n",
+ "... ... ... ... ... \n",
+ "130135 0.136381 0.592159 0.616564 0.365543 \n",
+ "130136 0.160630 0.538090 0.618908 0.419868 \n",
+ "130137 0.083622 0.639458 0.646376 0.470917 \n",
+ "130138 0.123363 0.638746 0.606588 0.256519 \n",
+ "130139 0.596398 0.451718 0.221622 0.416652 \n",
+ "\n",
+ " tar_elo2_ts_avg opp_w1_ts_avg opp_wx_ts_avg opp_w2_ts_avg \\\n",
+ "0 0.635697 0.000000 0.000000 1.000000 \n",
+ "1 0.471198 1.000000 0.000000 0.000000 \n",
+ "2 0.471198 0.000000 0.000000 1.000000 \n",
+ "3 0.471198 0.000000 0.000000 1.000000 \n",
+ "4 0.471198 0.000000 0.000000 1.000000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.450849 0.250000 0.250000 0.500000 \n",
+ "130136 0.474626 0.250000 0.250000 0.500000 \n",
+ "130137 0.471198 0.714286 0.142857 0.142857 \n",
+ "130138 0.330758 0.500000 0.204545 0.295455 \n",
+ "130139 0.358518 0.433333 0.133333 0.433333 \n",
+ "\n",
+ " opp_ht1_ts_avg opp_ht2_ts_avg opp_ft1_ts_avg opp_ft2_ts_avg \\\n",
+ "0 0.000000 0.400000 0.166667 0.375000 \n",
+ "1 0.000000 0.000000 0.333333 0.000000 \n",
+ "2 0.000000 0.600000 0.166667 0.750000 \n",
+ "3 0.500000 0.000000 0.333333 0.375000 \n",
+ "4 0.000000 0.200000 0.000000 0.250000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.093750 0.075000 0.187500 0.156250 \n",
+ "130136 0.083333 0.233333 0.208333 0.260417 \n",
+ "130137 0.142857 0.057143 0.238095 0.125000 \n",
+ "130138 0.181818 0.095455 0.285985 0.132102 \n",
+ "130139 0.150000 0.173333 0.216667 0.195833 \n",
+ "\n",
+ " opp_ps_ht_ts_avg opp_ps_ft_ts_avg opp_vote1_ts_avg \\\n",
+ "0 0.333333 0.428571 0.332264 \n",
+ "1 0.555556 0.714286 0.122613 \n",
+ "2 0.222222 0.214286 0.065924 \n",
+ "3 0.777778 0.500000 0.674786 \n",
+ "4 0.444444 0.428571 0.062467 \n",
+ "... ... ... ... \n",
+ "130135 0.555556 0.562500 0.446946 \n",
+ "130136 0.462963 0.511905 0.219680 \n",
+ "130137 0.587302 0.602041 0.502975 \n",
+ "130138 0.583333 0.618506 0.543774 \n",
+ "130139 0.525926 0.552381 0.393976 \n",
+ "\n",
+ " opp_votex_ts_avg opp_vote2_ts_avg opp_elo1_ts_avg opp_elo2_ts_avg \\\n",
+ "0 0.305874 0.560808 0.892168 0.931487 \n",
+ "1 0.410600 0.720589 0.470917 0.471198 \n",
+ "2 0.211592 0.876629 0.470917 0.471198 \n",
+ "3 0.450061 0.143268 0.470917 0.471198 \n",
+ "4 0.153514 0.908948 0.470917 0.471198 \n",
+ "... ... ... ... ... \n",
+ "130135 0.623915 0.287108 0.470917 0.471198 \n",
+ "130136 0.665290 0.496126 0.470917 0.471198 \n",
+ "130137 0.714909 0.185349 0.470917 0.471198 \n",
+ "130138 0.543209 0.229365 0.394096 0.368423 \n",
+ "130139 0.491921 0.406128 0.396879 0.427319 \n",
+ "\n",
+ " diff_w1_tt_avg diff_wx_tt_avg diff_w2_tt_avg diff_ht1_tt_avg \\\n",
+ "0 0.645570 0.405063 0.449367 0.709916 \n",
+ "1 0.778626 0.358779 0.362595 0.385496 \n",
+ "2 0.795833 0.366667 0.337500 0.894444 \n",
+ "3 0.810484 0.362903 0.326613 0.735215 \n",
+ "4 0.500000 0.500000 0.500000 0.444444 \n",
+ "... ... ... ... ... \n",
+ "130135 0.525000 0.479167 0.495833 0.508333 \n",
+ "130136 0.526515 0.506313 0.467172 0.528620 \n",
+ "130137 0.279221 0.542208 0.678571 0.465368 \n",
+ "130138 0.386842 0.555622 0.557536 0.450718 \n",
+ "130139 0.545992 0.540928 0.413080 0.546624 \n",
+ "\n",
+ " diff_ht2_tt_avg diff_ft1_tt_avg diff_ft2_tt_avg diff_ps_ht_tt_avg \\\n",
+ "0 0.476503 0.624684 0.442814 0.640587 \n",
+ "1 0.441603 0.545038 0.371251 0.483921 \n",
+ "2 0.432500 0.962500 0.464862 0.784043 \n",
+ "3 0.451008 0.775806 0.390700 0.676819 \n",
+ "4 0.425000 0.466667 0.433071 0.531915 \n",
+ "... ... ... ... ... \n",
+ "130135 0.530937 0.484167 0.516732 0.478457 \n",
+ "130136 0.442614 0.483081 0.430789 0.565280 \n",
+ "130137 0.539448 0.457143 0.545634 0.446532 \n",
+ "130138 0.504333 0.430514 0.530226 0.468001 \n",
+ "130139 0.460886 0.548481 0.458360 0.560073 \n",
+ "\n",
+ " diff_ps_ft_tt_avg diff_vote1_tt_avg diff_votex_tt_avg \\\n",
+ "0 0.618465 0.598509 0.322713 \n",
+ "1 0.613010 0.243569 0.416984 \n",
+ "2 0.824457 0.795494 0.224639 \n",
+ "3 0.750877 0.742185 0.265652 \n",
+ "4 0.521739 0.418542 0.387746 \n",
+ "... ... ... ... \n",
+ "130135 0.478804 0.366806 0.487068 \n",
+ "130136 0.533926 0.555305 0.387670 \n",
+ "130137 0.442405 0.309888 0.471288 \n",
+ "130138 0.435048 0.307040 0.600781 \n",
+ "130139 0.558668 0.639490 0.458146 \n",
+ "\n",
+ " diff_vote2_tt_avg diff_elo1_tt_avg diff_elo2_tt_avg diff_w1_ts_avg \\\n",
+ "0 0.469866 0.647809 0.455834 1.000000 \n",
+ "1 0.767833 0.334032 0.289995 0.500000 \n",
+ "2 0.320414 0.496160 0.453465 1.000000 \n",
+ "3 0.355826 0.394199 0.370364 1.000000 \n",
+ "4 0.614952 0.500000 0.500000 0.500000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.628617 0.402124 0.484687 0.516667 \n",
+ "130136 0.487533 0.450157 0.501069 0.487245 \n",
+ "130137 0.687145 0.500000 0.500000 0.309524 \n",
+ "130138 0.644760 0.375189 0.458826 0.345745 \n",
+ "130139 0.384572 0.528082 0.468265 0.526577 \n",
+ "\n",
+ " diff_wx_ts_avg diff_w2_ts_avg diff_ht1_ts_avg diff_ht2_ts_avg \\\n",
+ "0 0.500000 0.000000 0.750000 0.300000 \n",
+ "1 0.500000 0.500000 0.500000 0.500000 \n",
+ "2 0.500000 0.000000 0.875000 0.200000 \n",
+ "3 0.500000 0.000000 0.500000 0.500000 \n",
+ "4 0.500000 0.500000 0.500000 0.400000 \n",
+ "... ... ... ... ... \n",
+ "130135 0.483333 0.500000 0.498958 0.524167 \n",
+ "130136 0.517857 0.494898 0.506803 0.464966 \n",
+ "130137 0.484127 0.706349 0.511905 0.515873 \n",
+ "130138 0.567940 0.586315 0.451644 0.522485 \n",
+ "130139 0.554955 0.418468 0.539865 0.469189 \n",
+ "\n",
+ " diff_ft1_ts_avg diff_ft2_ts_avg diff_ps_ht_ts_avg \\\n",
+ "0 0.666667 0.306452 0.833333 \n",
+ "1 0.500000 0.500000 0.500000 \n",
+ "2 0.916667 0.177419 1.000000 \n",
+ "3 0.666667 0.306452 0.500000 \n",
+ "4 0.500000 0.435484 0.583333 \n",
+ "... ... ... ... \n",
+ "130135 0.478472 0.507527 0.479167 \n",
+ "130136 0.474065 0.466974 0.533730 \n",
+ "130137 0.464286 0.535842 0.494709 \n",
+ "130138 0.435022 0.533397 0.449025 \n",
+ "130139 0.539565 0.477390 0.552252 \n",
+ "\n",
+ " diff_ps_ft_ts_avg diff_vote1_ts_avg diff_votex_ts_avg \\\n",
+ "0 0.750000 0.785620 0.368704 \n",
+ "1 0.500000 0.491586 0.482920 \n",
+ "2 1.000000 0.938040 0.500000 \n",
+ "3 0.750000 0.601075 0.357198 \n",
+ "4 0.550000 0.543154 0.561524 \n",
+ "... ... ... ... \n",
+ "130135 0.481250 0.332450 0.471017 \n",
+ "130136 0.510034 0.468142 0.383904 \n",
+ "130137 0.450794 0.273758 0.431135 \n",
+ "130138 0.435131 0.273188 0.587198 \n",
+ "130139 0.541261 0.609207 0.463307 \n",
+ "\n",
+ " diff_vote2_ts_avg diff_elo1_ts_avg diff_elo2_ts_avg diff_vote12 \\\n",
+ "0 0.246279 0.596018 0.186188 0.725685 \n",
+ "1 0.513713 0.500000 0.500000 0.626219 \n",
+ "2 0.050917 0.500000 0.500000 0.878608 \n",
+ "3 0.438903 0.500000 0.500000 0.810986 \n",
+ "4 0.437437 0.500000 0.500000 0.699136 \n",
+ "... ... ... ... ... \n",
+ "130135 0.680405 0.401952 0.478411 0.706570 \n",
+ "130136 0.567234 0.452500 0.503638 0.458511 \n",
+ "130137 0.752452 0.500000 0.500000 0.215050 \n",
+ "130138 0.706563 0.371987 0.460040 0.213806 \n",
+ "130139 0.398967 0.518398 0.427007 0.766734 \n",
+ "\n",
+ " diff_elo \n",
+ "0 0.500000 \n",
+ "1 0.472435 \n",
+ "2 0.765927 \n",
+ "3 0.554411 \n",
+ "4 0.500000 \n",
+ "... ... \n",
+ "130135 0.500000 \n",
+ "130136 0.500000 \n",
+ "130137 0.500000 \n",
+ "130138 0.444451 \n",
+ "130139 0.720416 \n",
+ "\n",
+ "[130140 rows x 89 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n pop_r \n elo1 \n elo2 \n tar_w1_tt_avg \n tar_wx_tt_avg \n tar_w2_tt_avg \n tar_ht1_tt_avg \n tar_ht2_tt_avg \n tar_ft1_tt_avg \n tar_ft2_tt_avg \n tar_ps_ht_tt_avg \n tar_ps_ft_tt_avg \n tar_vote1_tt_avg \n tar_votex_tt_avg \n tar_vote2_tt_avg \n tar_elo1_tt_avg \n tar_elo2_tt_avg \n opp_w1_tt_avg \n opp_wx_tt_avg \n opp_w2_tt_avg \n opp_ht1_tt_avg \n opp_ht2_tt_avg \n opp_ft1_tt_avg \n opp_ft2_tt_avg \n opp_ps_ht_tt_avg \n opp_ps_ft_tt_avg \n opp_vote1_tt_avg \n opp_votex_tt_avg \n opp_vote2_tt_avg \n opp_elo1_tt_avg \n opp_elo2_tt_avg \n tar_w1_ts_avg \n tar_wx_ts_avg \n tar_w2_ts_avg \n tar_ht1_ts_avg \n tar_ht2_ts_avg \n tar_ft1_ts_avg \n tar_ft2_ts_avg \n tar_ps_ht_ts_avg \n tar_ps_ft_ts_avg \n tar_vote1_ts_avg \n tar_votex_ts_avg \n tar_vote2_ts_avg \n tar_elo1_ts_avg \n tar_elo2_ts_avg \n opp_w1_ts_avg \n opp_wx_ts_avg \n opp_w2_ts_avg \n opp_ht1_ts_avg \n opp_ht2_ts_avg \n opp_ft1_ts_avg \n opp_ft2_ts_avg \n opp_ps_ht_ts_avg \n opp_ps_ft_ts_avg \n opp_vote1_ts_avg \n opp_votex_ts_avg \n opp_vote2_ts_avg \n opp_elo1_ts_avg \n opp_elo2_ts_avg \n diff_w1_tt_avg \n diff_wx_tt_avg \n diff_w2_tt_avg \n diff_ht1_tt_avg \n diff_ht2_tt_avg \n diff_ft1_tt_avg \n diff_ft2_tt_avg \n diff_ps_ht_tt_avg \n diff_ps_ft_tt_avg \n diff_vote1_tt_avg \n diff_votex_tt_avg \n diff_vote2_tt_avg \n diff_elo1_tt_avg \n diff_elo2_tt_avg \n diff_w1_ts_avg \n diff_wx_ts_avg \n diff_w2_ts_avg \n diff_ht1_ts_avg \n diff_ht2_ts_avg \n diff_ft1_ts_avg \n diff_ft2_ts_avg \n diff_ps_ht_ts_avg \n diff_ps_ft_ts_avg \n diff_vote1_ts_avg \n diff_votex_ts_avg \n diff_vote2_ts_avg \n diff_elo1_ts_avg \n diff_elo2_ts_avg \n diff_vote12 \n diff_elo \n \n \n \n \n 0 \n 1.00 \n 0.472463 \n 0.472463 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.571429 \n 0.000000 \n 0.500000 \n 0.000000 \n 0.823529 \n 0.782609 \n 0.858299 \n 0.162021 \n 0.104519 \n 1.000000 \n 0.635697 \n 0.708861 \n 0.189873 \n 0.101266 \n 0.211573 \n 0.041772 \n 0.292194 \n 0.094937 \n 0.650782 \n 0.624656 \n 0.708244 \n 0.362246 \n 0.156602 \n 0.836029 \n 0.678962 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.500000 \n 0.000000 \n 0.500000 \n 0.000000 \n 0.777778 \n 0.785714 \n 0.861679 \n 0.162021 \n 0.097464 \n 0.995360 \n 0.635697 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.400000 \n 0.166667 \n 0.375000 \n 0.333333 \n 0.428571 \n 0.332264 \n 0.305874 \n 0.560808 \n 0.892168 \n 0.931487 \n 0.645570 \n 0.405063 \n 0.449367 \n 0.709916 \n 0.476503 \n 0.624684 \n 0.442814 \n 0.640587 \n 0.618465 \n 0.598509 \n 0.322713 \n 0.469866 \n 0.647809 \n 0.455834 \n 1.000000 \n 0.500000 \n 0.000000 \n 0.750000 \n 0.300000 \n 0.666667 \n 0.306452 \n 0.833333 \n 0.750000 \n 0.785620 \n 0.368704 \n 0.246279 \n 0.596018 \n 0.186188 \n 0.725685 \n 0.500000 \n \n \n 1 \n 0.75 \n 0.628003 \n 0.657831 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.333333 \n 0.000000 \n 0.588235 \n 0.695652 \n 0.085197 \n 0.391887 \n 0.799607 \n 0.464370 \n 0.471198 \n 0.442748 \n 0.282443 \n 0.274809 \n 0.196292 \n 0.103817 \n 0.258270 \n 0.213740 \n 0.607993 \n 0.544972 \n 0.475809 \n 0.485643 \n 0.336685 \n 0.648486 \n 0.676920 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.333333 \n 0.000000 \n 0.555556 \n 0.714286 \n 0.107016 \n 0.391887 \n 0.745632 \n 0.470917 \n 0.471198 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.333333 \n 0.000000 \n 0.555556 \n 0.714286 \n 0.122613 \n 0.410600 \n 0.720589 \n 0.470917 \n 0.471198 \n 0.778626 \n 0.358779 \n 0.362595 \n 0.385496 \n 0.441603 \n 0.545038 \n 0.371251 \n 0.483921 \n 0.613010 \n 0.243569 \n 0.416984 \n 0.767833 \n 0.334032 \n 0.289995 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.491586 \n 0.482920 \n 0.513713 \n 0.500000 \n 0.500000 \n 0.626219 \n 0.472435 \n \n \n 2 \n 1.00 \n 0.811409 \n 0.523643 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.857143 \n 0.000000 \n 1.000000 \n 0.166667 \n 0.941176 \n 0.956522 \n 0.874874 \n 0.211592 \n 0.060606 \n 0.464370 \n 0.471198 \n 0.408333 \n 0.266667 \n 0.325000 \n 0.180952 \n 0.120000 \n 0.229167 \n 0.225000 \n 0.592157 \n 0.523913 \n 0.424758 \n 0.522580 \n 0.371003 \n 0.468630 \n 0.516784 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.750000 \n 0.000000 \n 1.000000 \n 0.125000 \n 0.888889 \n 0.928571 \n 0.877859 \n 0.211592 \n 0.056515 \n 0.470917 \n 0.471198 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.600000 \n 0.166667 \n 0.750000 \n 0.222222 \n 0.214286 \n 0.065924 \n 0.211592 \n 0.876629 \n 0.470917 \n 0.471198 \n 0.795833 \n 0.366667 \n 0.337500 \n 0.894444 \n 0.432500 \n 0.962500 \n 0.464862 \n 0.784043 \n 0.824457 \n 0.795494 \n 0.224639 \n 0.320414 \n 0.496160 \n 0.453465 \n 1.000000 \n 0.500000 \n 0.000000 \n 0.875000 \n 0.200000 \n 0.916667 \n 0.177419 \n 1.000000 \n 1.000000 \n 0.938040 \n 0.500000 \n 0.050917 \n 0.500000 \n 0.500000 \n 0.878608 \n 0.765927 \n \n \n 3 \n 1.00 \n 0.756613 \n 0.697733 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.571429 \n 0.000000 \n 0.666667 \n 0.000000 \n 0.823529 \n 0.869565 \n 0.858765 \n 0.293601 \n 0.033987 \n 0.464370 \n 0.471198 \n 0.379032 \n 0.274194 \n 0.346774 \n 0.168203 \n 0.087097 \n 0.206989 \n 0.181452 \n 0.606262 \n 0.535063 \n 0.489853 \n 0.558270 \n 0.283177 \n 0.581739 \n 0.598190 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.500000 \n 0.000000 \n 0.666667 \n 0.000000 \n 0.777778 \n 0.857143 \n 0.862134 \n 0.293601 \n 0.031693 \n 0.470917 \n 0.471198 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.500000 \n 0.000000 \n 0.333333 \n 0.375000 \n 0.777778 \n 0.500000 \n 0.674786 \n 0.450061 \n 0.143268 \n 0.470917 \n 0.471198 \n 0.810484 \n 0.362903 \n 0.326613 \n 0.735215 \n 0.451008 \n 0.775806 \n 0.390700 \n 0.676819 \n 0.750877 \n 0.742185 \n 0.265652 \n 0.355826 \n 0.394199 \n 0.370364 \n 1.000000 \n 0.500000 \n 0.000000 \n 0.500000 \n 0.500000 \n 0.666667 \n 0.306452 \n 0.500000 \n 0.750000 \n 0.601075 \n 0.357198 \n 0.438903 \n 0.500000 \n 0.500000 \n 0.810986 \n 0.554411 \n \n \n 4 \n 0.50 \n 0.472463 \n 0.472463 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.166667 \n 0.588235 \n 0.434783 \n 0.121502 \n 0.220921 \n 0.852222 \n 0.464370 \n 0.471198 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.095238 \n 0.133333 \n 0.055556 \n 0.277778 \n 0.549020 \n 0.405797 \n 0.245585 \n 0.347700 \n 0.653541 \n 0.464370 \n 0.471198 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.125000 \n 0.555556 \n 0.500000 \n 0.142456 \n 0.220921 \n 0.794695 \n 0.470917 \n 0.471198 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.200000 \n 0.000000 \n 0.250000 \n 0.444444 \n 0.428571 \n 0.062467 \n 0.153514 \n 0.908948 \n 0.470917 \n 0.471198 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.444444 \n 0.425000 \n 0.466667 \n 0.433071 \n 0.531915 \n 0.521739 \n 0.418542 \n 0.387746 \n 0.614952 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.500000 \n 0.400000 \n 0.500000 \n 0.435484 \n 0.583333 \n 0.550000 \n 0.543154 \n 0.561524 \n 0.437437 \n 0.500000 \n 0.500000 \n 0.699136 \n 0.500000 \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 130135 \n 0.00 \n 0.472463 \n 0.472463 \n 0.300000 \n 0.208333 \n 0.491667 \n 0.121429 \n 0.130000 \n 0.161111 \n 0.236111 \n 0.561765 \n 0.482609 \n 0.230542 \n 0.609309 \n 0.530193 \n 0.355792 \n 0.456196 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.107143 \n 0.075000 \n 0.187500 \n 0.208333 \n 0.588235 \n 0.510870 \n 0.433433 \n 0.623915 \n 0.307892 \n 0.464370 \n 0.471198 \n 0.283333 \n 0.216667 \n 0.500000 \n 0.091667 \n 0.123333 \n 0.144444 \n 0.170833 \n 0.527778 \n 0.535714 \n 0.136381 \n 0.592159 \n 0.616564 \n 0.365543 \n 0.450849 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.093750 \n 0.075000 \n 0.187500 \n 0.156250 \n 0.555556 \n 0.562500 \n 0.446946 \n 0.623915 \n 0.287108 \n 0.470917 \n 0.471198 \n 0.525000 \n 0.479167 \n 0.495833 \n 0.508333 \n 0.530937 \n 0.484167 \n 0.516732 \n 0.478457 \n 0.478804 \n 0.366806 \n 0.487068 \n 0.628617 \n 0.402124 \n 0.484687 \n 0.516667 \n 0.483333 \n 0.500000 \n 0.498958 \n 0.524167 \n 0.478472 \n 0.507527 \n 0.479167 \n 0.481250 \n 0.332450 \n 0.471017 \n 0.680405 \n 0.401952 \n 0.478411 \n 0.706570 \n 0.500000 \n \n \n 130136 \n 0.00 \n 0.472463 \n 0.472463 \n 0.303030 \n 0.262626 \n 0.434343 \n 0.144300 \n 0.131313 \n 0.180135 \n 0.232323 \n 0.570410 \n 0.494510 \n 0.284858 \n 0.538426 \n 0.510493 \n 0.409076 \n 0.472245 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.095238 \n 0.233333 \n 0.208333 \n 0.347222 \n 0.490196 \n 0.449275 \n 0.200614 \n 0.665290 \n 0.532040 \n 0.464370 \n 0.471198 \n 0.224490 \n 0.285714 \n 0.489796 \n 0.096939 \n 0.163265 \n 0.156463 \n 0.196429 \n 0.507937 \n 0.526239 \n 0.160630 \n 0.538090 \n 0.618908 \n 0.419868 \n 0.474626 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.083333 \n 0.233333 \n 0.208333 \n 0.260417 \n 0.462963 \n 0.511905 \n 0.219680 \n 0.665290 \n 0.496126 \n 0.470917 \n 0.471198 \n 0.526515 \n 0.506313 \n 0.467172 \n 0.528620 \n 0.442614 \n 0.483081 \n 0.430789 \n 0.565280 \n 0.533926 \n 0.555305 \n 0.387670 \n 0.487533 \n 0.450157 \n 0.501069 \n 0.487245 \n 0.517857 \n 0.494898 \n 0.506803 \n 0.464966 \n 0.474065 \n 0.466974 \n 0.533730 \n 0.510034 \n 0.468142 \n 0.383904 \n 0.567234 \n 0.452500 \n 0.503638 \n 0.458511 \n 0.500000 \n \n \n 130137 \n 0.00 \n 0.472463 \n 0.472463 \n 0.272727 \n 0.227273 \n 0.500000 \n 0.103896 \n 0.127273 \n 0.166667 \n 0.242424 \n 0.556150 \n 0.482213 \n 0.201240 \n 0.682482 \n 0.522227 \n 0.464370 \n 0.471198 \n 0.714286 \n 0.142857 \n 0.142857 \n 0.163265 \n 0.057143 \n 0.238095 \n 0.166667 \n 0.621849 \n 0.559006 \n 0.490831 \n 0.714909 \n 0.198766 \n 0.464370 \n 0.471198 \n 0.333333 \n 0.111111 \n 0.555556 \n 0.166667 \n 0.088889 \n 0.166667 \n 0.194444 \n 0.580247 \n 0.531746 \n 0.083622 \n 0.639458 \n 0.646376 \n 0.470917 \n 0.471198 \n 0.714286 \n 0.142857 \n 0.142857 \n 0.142857 \n 0.057143 \n 0.238095 \n 0.125000 \n 0.587302 \n 0.602041 \n 0.502975 \n 0.714909 \n 0.185349 \n 0.470917 \n 0.471198 \n 0.279221 \n 0.542208 \n 0.678571 \n 0.465368 \n 0.539448 \n 0.457143 \n 0.545634 \n 0.446532 \n 0.442405 \n 0.309888 \n 0.471288 \n 0.687145 \n 0.500000 \n 0.500000 \n 0.309524 \n 0.484127 \n 0.706349 \n 0.511905 \n 0.515873 \n 0.464286 \n 0.535842 \n 0.494709 \n 0.450794 \n 0.273758 \n 0.431135 \n 0.752452 \n 0.500000 \n 0.500000 \n 0.215050 \n 0.500000 \n \n \n 130138 \n 0.00 \n 0.269385 \n 0.329496 \n 0.273684 \n 0.315789 \n 0.410526 \n 0.123308 \n 0.103158 \n 0.170175 \n 0.226316 \n 0.578328 \n 0.492449 \n 0.238697 \n 0.657029 \n 0.496170 \n 0.247452 \n 0.328089 \n 0.500000 \n 0.204545 \n 0.295455 \n 0.207792 \n 0.095455 \n 0.285985 \n 0.176136 \n 0.617647 \n 0.579051 \n 0.532626 \n 0.543209 \n 0.245968 \n 0.385910 \n 0.368423 \n 0.191489 \n 0.340426 \n 0.468085 \n 0.085106 \n 0.140426 \n 0.156028 \n 0.196809 \n 0.515366 \n 0.525836 \n 0.123363 \n 0.638746 \n 0.606588 \n 0.256519 \n 0.330758 \n 0.500000 \n 0.204545 \n 0.295455 \n 0.181818 \n 0.095455 \n 0.285985 \n 0.132102 \n 0.583333 \n 0.618506 \n 0.543774 \n 0.543209 \n 0.229365 \n 0.394096 \n 0.368423 \n 0.386842 \n 0.555622 \n 0.557536 \n 0.450718 \n 0.504333 \n 0.430514 \n 0.530226 \n 0.468001 \n 0.435048 \n 0.307040 \n 0.600781 \n 0.644760 \n 0.375189 \n 0.458826 \n 0.345745 \n 0.567940 \n 0.586315 \n 0.451644 \n 0.522485 \n 0.435022 \n 0.533397 \n 0.449025 \n 0.435131 \n 0.273188 \n 0.587198 \n 0.706563 \n 0.371987 \n 0.460040 \n 0.213806 \n 0.444451 \n \n \n 130139 \n 0.50 \n 0.639889 \n 0.401371 \n 0.525316 \n 0.215190 \n 0.259494 \n 0.251356 \n 0.103797 \n 0.297468 \n 0.191983 \n 0.630678 \n 0.576775 \n 0.591648 \n 0.444651 \n 0.236022 \n 0.419905 \n 0.396232 \n 0.433333 \n 0.133333 \n 0.433333 \n 0.171429 \n 0.173333 \n 0.216667 \n 0.261111 \n 0.556863 \n 0.498551 \n 0.379169 \n 0.491921 \n 0.435527 \n 0.388752 \n 0.427319 \n 0.486486 \n 0.243243 \n 0.270270 \n 0.229730 \n 0.111712 \n 0.295796 \n 0.152027 \n 0.595596 \n 0.611326 \n 0.596398 \n 0.451718 \n 0.221622 \n 0.416652 \n 0.358518 \n 0.433333 \n 0.133333 \n 0.433333 \n 0.150000 \n 0.173333 \n 0.216667 \n 0.195833 \n 0.525926 \n 0.552381 \n 0.393976 \n 0.491921 \n 0.406128 \n 0.396879 \n 0.427319 \n 0.545992 \n 0.540928 \n 0.413080 \n 0.546624 \n 0.460886 \n 0.548481 \n 0.458360 \n 0.560073 \n 0.558668 \n 0.639490 \n 0.458146 \n 0.384572 \n 0.528082 \n 0.468265 \n 0.526577 \n 0.554955 \n 0.418468 \n 0.539865 \n 0.469189 \n 0.539565 \n 0.477390 \n 0.552252 \n 0.541261 \n 0.609207 \n 0.463307 \n 0.398967 \n 0.518398 \n 0.427007 \n 0.766734 \n 0.720416 \n \n \n
\n
130140 rows × 89 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 88
+ }
+ ],
+ "source": [
+ "scaler=MinMaxScaler()\n",
+ "nums=scaler.fit_transform(df[df.columns[25:]].values)\n",
+ "nums_df=pd.DataFrame(nums, columns=df.columns[25:])\n",
+ "df.reset_index(drop=True, inplace=True)\n",
+ "df=pd.concat([df[df.columns[:25]],nums_df], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_info=df[COL_INF]\n",
+ "labels=df[['w1', 'wx', 'w2']].values\n",
+ "\n",
+ "encoder = OneHotEncoder()\n",
+ "countries=encoder.fit_transform(df[['country_id']]).toarray()\n",
+ "encoder = OneHotEncoder()\n",
+ "form1=encoder.fit_transform(df[['form1']]).toarray()\n",
+ "encoder = OneHotEncoder()\n",
+ "form2=encoder.fit_transform(df[['form2']]).toarray()\n",
+ "side=df[['side']].values\n",
+ "\n",
+ "data=np.hstack([nums,countries,form1,form2,side])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "((130140, 187), (130140, 3))"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 108
+ }
+ ],
+ "source": [
+ "data.shape,labels.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(130140, 187)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 103
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "tt_avg tar_ht1_tt_avg tar_ht2_tt_avg \\\n",
+ "73 0.000000 0.000000 2.000000 0.000000 \n",
+ "115 0.000000 0.000000 0.000000 0.000000 \n",
+ "143 0.000000 0.000000 3.000000 0.000000 \n",
+ "145 0.000000 0.000000 2.000000 0.000000 \n",
+ "149 0.000000 1.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.208333 0.491667 0.425000 0.650000 \n",
+ "135576 0.262626 0.434343 0.505051 0.656566 \n",
+ "135577 0.227273 0.500000 0.363636 0.636364 \n",
+ "135578 0.315789 0.410526 0.431579 0.515789 \n",
+ "135579 0.215190 0.259494 0.879747 0.518987 \n",
+ "\n",
+ " tar_ft1_tt_avg tar_ft2_tt_avg tar_ps_ht_tt_avg tar_ps_ft_tt_avg \\\n",
+ "73 3.000000 0.000000 2.000000 3.000000 \n",
+ "115 2.000000 0.000000 0.000000 2.000000 \n",
+ "143 6.000000 1.000000 3.000000 5.000000 \n",
+ "145 4.000000 0.000000 2.000000 4.000000 \n",
+ "149 0.000000 1.000000 0.000000 -1.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.966667 1.416667 -0.225000 -0.450000 \n",
+ "135576 1.080808 1.393939 -0.151515 -0.313131 \n",
+ "135577 1.000000 1.454545 -0.272727 -0.454545 \n",
+ "135578 1.021053 1.357895 -0.084211 -0.336842 \n",
+ "135579 1.784810 1.151899 0.360759 0.632911 \n",
+ "\n",
+ " tar_vote1_tt_avg tar_votex_tt_avg tar_vote2_tt_avg tar_elo1_tt_avg \\\n",
+ "73 0.800165 0.070749 0.129086 2071.864258 \n",
+ "115 0.129576 0.171123 0.699301 1512.538486 \n",
+ "143 0.814543 0.092395 0.093062 1512.538486 \n",
+ "145 0.800570 0.128205 0.071225 1512.538486 \n",
+ "149 0.161068 0.096469 0.742463 1512.538486 \n",
+ "... ... ... ... ... \n",
+ "135575 0.255649 0.266064 0.478287 1399.157367 \n",
+ "135576 0.302763 0.235111 0.462126 1454.798840 \n",
+ "135577 0.230233 0.298016 0.471752 1512.538486 \n",
+ "135578 0.262723 0.286901 0.450376 1286.024429 \n",
+ "135579 0.568873 0.194163 0.236964 1466.106194 \n",
+ "\n",
+ " tar_elo2_tt_avg opp_w1_tt_avg opp_wx_tt_avg opp_w2_tt_avg \\\n",
+ "73 1687.874878 0.708861 0.189873 0.101266 \n",
+ "115 1512.538486 0.442748 0.282443 0.274809 \n",
+ "143 1512.538486 0.408333 0.266667 0.325000 \n",
+ "145 1512.538486 0.379032 0.274194 0.346774 \n",
+ "149 1512.538486 0.000000 0.000000 1.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 1496.549027 0.250000 0.250000 0.500000 \n",
+ "135576 1513.654585 0.250000 0.250000 0.500000 \n",
+ "135577 1512.538486 0.714286 0.142857 0.142857 \n",
+ "135578 1360.001905 0.500000 0.204545 0.295455 \n",
+ "135579 1432.634170 0.433333 0.133333 0.433333 \n",
+ "\n",
+ " opp_ht1_tt_avg opp_ht2_tt_avg opp_ft1_tt_avg opp_ft2_tt_avg \\\n",
+ "73 0.740506 0.208861 1.753165 0.569620 \n",
+ "115 0.687023 0.519084 1.549618 1.282443 \n",
+ "143 0.633333 0.600000 1.375000 1.350000 \n",
+ "145 0.588710 0.435484 1.241935 1.088710 \n",
+ "149 0.333333 0.666667 0.333333 1.666667 \n",
+ "... ... ... ... ... \n",
+ "135575 0.375000 0.375000 1.125000 1.250000 \n",
+ "135576 0.333333 1.166667 1.250000 2.083333 \n",
+ "135577 0.571429 0.285714 1.428571 1.000000 \n",
+ "135578 0.727273 0.477273 1.715909 1.056818 \n",
+ "135579 0.600000 0.866667 1.300000 1.566667 \n",
+ "\n",
+ " opp_ps_ht_tt_avg opp_ps_ft_tt_avg opp_vote1_tt_avg \\\n",
+ "73 0.531646 1.183544 0.670008 \n",
+ "115 0.167939 0.267176 0.468394 \n",
+ "143 0.033333 0.025000 0.424112 \n",
+ "145 0.153226 0.153226 0.480576 \n",
+ "149 -0.333333 -1.333333 0.268697 \n",
+ "... ... ... ... \n",
+ "135575 0.000000 -0.125000 0.431636 \n",
+ "135576 -0.833333 -0.833333 0.229689 \n",
+ "135577 0.285714 0.428571 0.481423 \n",
+ "135578 0.250000 0.659091 0.517677 \n",
+ "135579 -0.266667 -0.266667 0.384568 \n",
+ "\n",
+ " opp_votex_tt_avg opp_vote2_tt_avg opp_elo1_tt_avg opp_elo2_tt_avg \\\n",
+ "73 0.158180 0.171812 1900.639220 1733.990657 \n",
+ "115 0.212063 0.319543 1704.799281 1731.814017 \n",
+ "143 0.228192 0.347695 1516.987152 1561.128075 \n",
+ "145 0.243777 0.275648 1635.100106 1647.897169 \n",
+ "149 0.151828 0.579475 1512.538486 1512.538486 \n",
+ "... ... ... ... ... \n",
+ "135575 0.272441 0.295922 1512.538486 1512.538486 \n",
+ "135576 0.290508 0.479802 1512.538486 1512.538486 \n",
+ "135577 0.312175 0.206401 1512.538486 1512.538486 \n",
+ "135578 0.237200 0.245123 1430.607804 1402.993755 \n",
+ "135579 0.214804 0.400628 1433.575904 1465.769550 \n",
+ "\n",
+ " tar_w1_ts_avg tar_wx_ts_avg tar_w2_ts_avg tar_ht1_ts_avg \\\n",
+ "73 1.000000 0.000000 0.000000 2.000000 \n",
+ "115 1.000000 0.000000 0.000000 0.000000 \n",
+ "143 1.000000 0.000000 0.000000 3.000000 \n",
+ "145 1.000000 0.000000 0.000000 2.000000 \n",
+ "149 0.000000 0.000000 1.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.283333 0.216667 0.500000 0.366667 \n",
+ "135576 0.224490 0.285714 0.489796 0.387755 \n",
+ "135577 0.333333 0.111111 0.555556 0.666667 \n",
+ "135578 0.191489 0.340426 0.468085 0.340426 \n",
+ "135579 0.486486 0.243243 0.270270 0.918919 \n",
+ "\n",
+ " tar_ht2_ts_avg tar_ft1_ts_avg tar_ft2_ts_avg tar_ps_ht_ts_avg \\\n",
+ "73 0.000000 3.000000 0.000000 2.000000 \n",
+ "115 0.000000 2.000000 0.000000 0.000000 \n",
+ "143 0.000000 6.000000 1.000000 3.000000 \n",
+ "145 0.000000 4.000000 0.000000 2.000000 \n",
+ "149 0.000000 0.000000 1.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.616667 0.866667 1.366667 -0.250000 \n",
+ "135576 0.816327 0.938776 1.571429 -0.428571 \n",
+ "135577 0.444444 1.000000 1.555556 0.222222 \n",
+ "135578 0.702128 0.936170 1.574468 -0.361702 \n",
+ "135579 0.558559 1.774775 1.216216 0.360360 \n",
+ "\n",
+ " tar_ps_ft_ts_avg tar_vote1_ts_avg tar_votex_ts_avg \\\n",
+ "73 3.000000 0.800165 0.070749 \n",
+ "115 2.000000 0.129576 0.171123 \n",
+ "143 5.000000 0.814543 0.092395 \n",
+ "145 4.000000 0.800570 0.128205 \n",
+ "149 -1.000000 0.161068 0.096469 \n",
+ "... ... ... ... \n",
+ "135575 -0.500000 0.155670 0.258575 \n",
+ "135576 -0.632653 0.177217 0.234965 \n",
+ "135577 -0.555556 0.108789 0.279229 \n",
+ "135578 -0.638298 0.144103 0.278918 \n",
+ "135579 0.558559 0.564439 0.197249 \n",
+ "\n",
+ " tar_vote2_ts_avg tar_elo1_ts_avg tar_elo2_ts_avg opp_w1_ts_avg \\\n",
+ "73 0.129086 2071.864258 1687.874878 0.000000 \n",
+ "115 0.699301 1512.538486 1512.538486 1.000000 \n",
+ "143 0.093062 1512.538486 1512.538486 0.000000 \n",
+ "145 0.071225 1512.538486 1512.538486 0.000000 \n",
+ "149 0.742463 1512.538486 1512.538486 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.585755 1400.155820 1490.849094 0.250000 \n",
+ "135576 0.587818 1458.094223 1516.193138 0.250000 \n",
+ "135577 0.611983 1512.538486 1512.538486 0.714286 \n",
+ "135578 0.576980 1283.879600 1362.846871 0.500000 \n",
+ "135579 0.238312 1454.664049 1392.435630 0.433333 \n",
+ "\n",
+ " opp_wx_ts_avg opp_w2_ts_avg opp_ht1_ts_avg opp_ht2_ts_avg \\\n",
+ "73 0.000000 1.000000 0.000000 2.000000 \n",
+ "115 0.000000 0.000000 0.000000 0.000000 \n",
+ "143 0.000000 1.000000 0.000000 3.000000 \n",
+ "145 0.000000 1.000000 2.000000 0.000000 \n",
+ "149 0.000000 1.000000 0.000000 1.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 0.250000 0.500000 0.375000 0.375000 \n",
+ "135576 0.250000 0.500000 0.333333 1.166667 \n",
+ "135577 0.142857 0.142857 0.571429 0.285714 \n",
+ "135578 0.204545 0.295455 0.727273 0.477273 \n",
+ "135579 0.133333 0.433333 0.600000 0.866667 \n",
+ "\n",
+ " opp_ft1_ts_avg opp_ft2_ts_avg opp_ps_ht_ts_avg opp_ps_ft_ts_avg \\\n",
+ "73 1.000000 3.000000 -2.000000 -2.000000 \n",
+ "115 2.000000 0.000000 0.000000 2.000000 \n",
+ "143 1.000000 6.000000 -3.000000 -5.000000 \n",
+ "145 2.000000 3.000000 2.000000 -1.000000 \n",
+ "149 0.000000 2.000000 -1.000000 -2.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 1.125000 1.250000 0.000000 -0.125000 \n",
+ "135576 1.250000 2.083333 -0.833333 -0.833333 \n",
+ "135577 1.428571 1.000000 0.285714 0.428571 \n",
+ "135578 1.715909 1.056818 0.250000 0.659091 \n",
+ "135579 1.300000 1.566667 -0.266667 -0.266667 \n",
+ "\n",
+ " opp_vote1_ts_avg opp_votex_ts_avg opp_vote2_ts_avg opp_elo1_ts_avg \\\n",
+ "73 0.329731 0.133564 0.536705 1961.808838 \n",
+ "115 0.143436 0.179294 0.677270 1512.538486 \n",
+ "143 0.093062 0.092395 0.814543 1512.538486 \n",
+ "145 0.634093 0.196526 0.169381 1512.538486 \n",
+ "149 0.089991 0.067034 0.842975 1512.538486 \n",
+ "... ... ... ... ... \n",
+ "135575 0.431636 0.272441 0.295922 1512.538486 \n",
+ "135576 0.229689 0.290508 0.479802 1512.538486 \n",
+ "135577 0.481423 0.312175 0.206401 1512.538486 \n",
+ "135578 0.517677 0.237200 0.245123 1430.607804 \n",
+ "135579 0.384568 0.214804 0.400628 1433.575904 \n",
+ "\n",
+ " opp_elo2_ts_avg diff_w1_tt_avg diff_wx_tt_avg diff_w2_tt_avg \\\n",
+ "73 2003.151367 0.291139 -0.189873 -0.101266 \n",
+ "115 1512.538486 0.557252 -0.282443 -0.274809 \n",
+ "143 1512.538486 0.591667 -0.266667 -0.325000 \n",
+ "145 1512.538486 0.620968 -0.274194 -0.346774 \n",
+ "149 1512.538486 0.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 1512.538486 0.050000 -0.041667 -0.008333 \n",
+ "135576 1512.538486 0.053030 0.012626 -0.065657 \n",
+ "135577 1512.538486 -0.441558 0.084416 0.357143 \n",
+ "135578 1402.993755 -0.226316 0.111244 0.115072 \n",
+ "135579 1465.769550 0.091983 0.081857 -0.173840 \n",
+ "\n",
+ " diff_ht1_tt_avg diff_ht2_tt_avg diff_ft1_tt_avg diff_ft2_tt_avg \\\n",
+ "73 1.259494 -0.208861 1.246835 -0.569620 \n",
+ "115 -0.687023 -0.519084 0.450382 -1.282443 \n",
+ "143 2.366667 -0.600000 4.625000 -0.350000 \n",
+ "145 1.411290 -0.435484 2.758065 -1.088710 \n",
+ "149 -0.333333 -0.666667 -0.333333 -0.666667 \n",
+ "... ... ... ... ... \n",
+ "135575 0.050000 0.275000 -0.158333 0.166667 \n",
+ "135576 0.171717 -0.510101 -0.169192 -0.689394 \n",
+ "135577 -0.207792 0.350649 -0.428571 0.454545 \n",
+ "135578 -0.295694 0.038517 -0.694856 0.301077 \n",
+ "135579 0.279747 -0.347679 0.484810 -0.414768 \n",
+ "\n",
+ " diff_ps_ht_tt_avg diff_ps_ft_tt_avg diff_vote1_tt_avg \\\n",
+ "73 1.468354 1.816456 0.130158 \n",
+ "115 -0.167939 1.732824 -0.338818 \n",
+ "143 2.966667 4.975000 0.390431 \n",
+ "145 1.846774 3.846774 0.319994 \n",
+ "149 0.333333 0.333333 -0.107629 \n",
+ "... ... ... ... \n",
+ "135575 -0.225000 -0.325000 -0.175987 \n",
+ "135576 0.681818 0.520202 0.073074 \n",
+ "135577 -0.558442 -0.883117 -0.251191 \n",
+ "135578 -0.334211 -0.995933 -0.254954 \n",
+ "135579 0.627426 0.899578 0.184305 \n",
+ "\n",
+ " diff_votex_tt_avg diff_vote2_tt_avg diff_elo1_tt_avg \\\n",
+ "73 -0.087431 -0.042727 1.712250e+02 \n",
+ "115 -0.040940 0.379758 -1.922608e+02 \n",
+ "143 -0.135797 -0.254633 -4.448666e+00 \n",
+ "145 -0.115572 -0.204422 -1.225616e+02 \n",
+ "149 -0.055360 0.162988 2.273737e-13 \n",
+ "... ... ... ... \n",
+ "135575 -0.006378 0.182365 -1.133811e+02 \n",
+ "135576 -0.055397 -0.017677 -5.773965e+01 \n",
+ "135577 -0.014160 0.265350 -6.821210e-13 \n",
+ "135578 0.049701 0.205253 -1.445834e+02 \n",
+ "135579 -0.020641 -0.163664 3.253029e+01 \n",
+ "\n",
+ " diff_elo2_tt_avg diff_w1_ts_avg diff_wx_ts_avg diff_w2_ts_avg \\\n",
+ "73 -4.611578e+01 1.000000 0.000000 -1.000000 \n",
+ "115 -2.192755e+02 0.000000 0.000000 0.000000 \n",
+ "143 -4.858959e+01 1.000000 0.000000 -1.000000 \n",
+ "145 -1.353587e+02 1.000000 0.000000 -1.000000 \n",
+ "149 2.273737e-13 0.000000 0.000000 0.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 -1.598946e+01 0.033333 -0.033333 0.000000 \n",
+ "135576 1.116099e+00 -0.025510 0.035714 -0.010204 \n",
+ "135577 -6.821210e-13 -0.380952 -0.031746 0.412698 \n",
+ "135578 -4.299185e+01 -0.308511 0.135880 0.172631 \n",
+ "135579 -3.313538e+01 0.053153 0.109910 -0.163063 \n",
+ "\n",
+ " diff_ht1_ts_avg diff_ht2_ts_avg diff_ft1_ts_avg diff_ft2_ts_avg \\\n",
+ "73 2.000000 -2.000000 2.000000 -3.000000 \n",
+ "115 0.000000 0.000000 0.000000 0.000000 \n",
+ "143 3.000000 -3.000000 5.000000 -5.000000 \n",
+ "145 0.000000 0.000000 2.000000 -3.000000 \n",
+ "149 0.000000 -1.000000 0.000000 -1.000000 \n",
+ "... ... ... ... ... \n",
+ "135575 -0.008333 0.241667 -0.258333 0.116667 \n",
+ "135576 0.054422 -0.350340 -0.311224 -0.511905 \n",
+ "135577 0.095238 0.158730 -0.428571 0.555556 \n",
+ "135578 -0.386847 0.224855 -0.779739 0.517650 \n",
+ "135579 0.318919 -0.308108 0.474775 -0.350450 \n",
+ "\n",
+ " diff_ps_ht_ts_avg diff_ps_ft_ts_avg diff_vote1_ts_avg \\\n",
+ "73 4.000000 5.000000 0.470435 \n",
+ "115 0.000000 0.000000 -0.013859 \n",
+ "143 6.000000 10.000000 0.721481 \n",
+ "145 0.000000 5.000000 0.166476 \n",
+ "149 1.000000 1.000000 0.071077 \n",
+ "... ... ... ... \n",
+ "135575 -0.250000 -0.375000 -0.275966 \n",
+ "135576 0.404762 0.200680 -0.052472 \n",
+ "135577 -0.063492 -0.984127 -0.372634 \n",
+ "135578 -0.611702 -1.297389 -0.373574 \n",
+ "135579 0.627027 0.825225 0.179871 \n",
+ "\n",
+ " diff_votex_ts_avg diff_vote2_ts_avg diff_elo1_ts_avg \\\n",
+ "73 -0.062816 -0.407619 110.055420 \n",
+ "115 -0.008171 0.022031 0.000000 \n",
+ "143 0.000000 -0.721481 0.000000 \n",
+ "145 -0.068320 -0.098156 0.000000 \n",
+ "149 0.029435 -0.100512 0.000000 \n",
+ "... ... ... ... \n",
+ "135575 -0.013866 0.289833 -112.382666 \n",
+ "135576 -0.055544 0.108016 -54.444263 \n",
+ "135577 -0.032947 0.405581 0.000000 \n",
+ "135578 0.041718 0.331856 -146.728203 \n",
+ "135579 -0.017555 -0.162316 21.088145 \n",
+ "\n",
+ " diff_elo2_ts_avg diff_vote12 diff_elo \n",
+ "73 -315.276489 0.437832 0.000000 \n",
+ "115 0.000000 0.244866 -33.351196 \n",
+ "143 0.000000 0.734504 321.749268 \n",
+ "145 0.000000 0.603317 65.832764 \n",
+ "149 0.000000 0.386328 0.000000 \n",
+ "... ... ... ... \n",
+ "135575 -21.689392 0.400749 0.000000 \n",
+ "135576 3.654652 -0.080490 0.000000 \n",
+ "135577 0.000000 -0.552807 0.000000 \n",
+ "135578 -40.146884 -0.555220 -67.209839 \n",
+ "135579 -73.333920 0.517467 266.684692 \n",
+ "\n",
+ "[130140 rows x 114 columns]"
+ ],
+ "text/html": "
\n\n
\n \n \n \n country \n liga \n mid \n round \n ds \n t1 \n t2 \n tid1 \n tid2 \n w1 \n wx \n w2 \n ft1 \n ft2 \n winner \n side \n country_id \n round \n ds \n de \n form1 \n form2 \n vote1 \n votex \n vote2 \n pop_r \n elo1 \n elo2 \n tar_w1_tt_avg \n tar_wx_tt_avg \n tar_w2_tt_avg \n tar_ht1_tt_avg \n tar_ht2_tt_avg \n tar_ft1_tt_avg \n tar_ft2_tt_avg \n tar_ps_ht_tt_avg \n tar_ps_ft_tt_avg \n tar_vote1_tt_avg \n tar_votex_tt_avg \n tar_vote2_tt_avg \n tar_elo1_tt_avg \n tar_elo2_tt_avg \n opp_w1_tt_avg \n opp_wx_tt_avg \n opp_w2_tt_avg \n opp_ht1_tt_avg \n opp_ht2_tt_avg \n opp_ft1_tt_avg \n opp_ft2_tt_avg \n opp_ps_ht_tt_avg \n opp_ps_ft_tt_avg \n opp_vote1_tt_avg \n opp_votex_tt_avg \n opp_vote2_tt_avg \n opp_elo1_tt_avg \n opp_elo2_tt_avg \n tar_w1_ts_avg \n tar_wx_ts_avg \n tar_w2_ts_avg \n tar_ht1_ts_avg \n tar_ht2_ts_avg \n tar_ft1_ts_avg \n tar_ft2_ts_avg \n tar_ps_ht_ts_avg \n tar_ps_ft_ts_avg \n tar_vote1_ts_avg \n tar_votex_ts_avg \n tar_vote2_ts_avg \n tar_elo1_ts_avg \n tar_elo2_ts_avg \n opp_w1_ts_avg \n opp_wx_ts_avg \n opp_w2_ts_avg \n opp_ht1_ts_avg \n opp_ht2_ts_avg \n opp_ft1_ts_avg \n opp_ft2_ts_avg \n opp_ps_ht_ts_avg \n opp_ps_ft_ts_avg \n opp_vote1_ts_avg \n opp_votex_ts_avg \n opp_vote2_ts_avg \n opp_elo1_ts_avg \n opp_elo2_ts_avg \n diff_w1_tt_avg \n diff_wx_tt_avg \n diff_w2_tt_avg \n diff_ht1_tt_avg \n diff_ht2_tt_avg \n diff_ft1_tt_avg \n diff_ft2_tt_avg \n diff_ps_ht_tt_avg \n diff_ps_ft_tt_avg \n diff_vote1_tt_avg \n diff_votex_tt_avg \n diff_vote2_tt_avg \n diff_elo1_tt_avg \n diff_elo2_tt_avg \n diff_w1_ts_avg \n diff_wx_ts_avg \n diff_w2_ts_avg \n diff_ht1_ts_avg \n diff_ht2_ts_avg \n diff_ft1_ts_avg \n diff_ft2_ts_avg \n diff_ps_ht_ts_avg \n diff_ps_ft_ts_avg \n diff_vote1_ts_avg \n diff_votex_ts_avg \n diff_vote2_ts_avg \n diff_elo1_ts_avg \n diff_elo2_ts_avg \n diff_vote12 \n diff_elo \n \n \n \n \n 73 \n spain \n copa-del-rey \n 6570261 \n 4 \n 2015-01-15 19:00:00+00:00 \n real madrid \n atletico madrid \n 9 \n 134 \n 0 \n 1 \n 0 \n 2.0 \n 2.0 \n draw \n 1 \n 42 \n 4 \n 2015-01-15 19:00:00+00:00 \n 2015-01-15 \n 18 \n 20 \n 0.671697 \n 0.094438 \n 0.233865 \n 4 \n 1512.538486 \n 1512.538486 \n 1.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 3.000000 \n 0.000000 \n 2.000000 \n 3.000000 \n 0.800165 \n 0.070749 \n 0.129086 \n 2071.864258 \n 1687.874878 \n 0.708861 \n 0.189873 \n 0.101266 \n 0.740506 \n 0.208861 \n 1.753165 \n 0.569620 \n 0.531646 \n 1.183544 \n 0.670008 \n 0.158180 \n 0.171812 \n 1900.639220 \n 1733.990657 \n 1.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 3.000000 \n 0.000000 \n 2.000000 \n 3.000000 \n 0.800165 \n 0.070749 \n 0.129086 \n 2071.864258 \n 1687.874878 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 2.000000 \n 1.000000 \n 3.000000 \n -2.000000 \n -2.000000 \n 0.329731 \n 0.133564 \n 0.536705 \n 1961.808838 \n 2003.151367 \n 0.291139 \n -0.189873 \n -0.101266 \n 1.259494 \n -0.208861 \n 1.246835 \n -0.569620 \n 1.468354 \n 1.816456 \n 0.130158 \n -0.087431 \n -0.042727 \n 1.712250e+02 \n -4.611578e+01 \n 1.000000 \n 0.000000 \n -1.000000 \n 2.000000 \n -2.000000 \n 2.000000 \n -3.000000 \n 4.000000 \n 5.000000 \n 0.470435 \n -0.062816 \n -0.407619 \n 110.055420 \n -315.276489 \n 0.437832 \n 0.000000 \n \n \n 115 \n spain \n laliga \n 5764480 \n 19 \n 2015-01-17 21:00:00+00:00 \n espanyol \n celta vigo \n 59 \n 17 \n 1 \n 0 \n 0 \n 1.0 \n 0.0 \n home \n 1 \n 42 \n 19 \n 2015-01-17 21:00:00+00:00 \n 2015-01-17 \n 20 \n 7 \n 0.500000 \n 0.244866 \n 0.255134 \n 3 \n 1686.446167 \n 1719.797363 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.129576 \n 0.171123 \n 0.699301 \n 1512.538486 \n 1512.538486 \n 0.442748 \n 0.282443 \n 0.274809 \n 0.687023 \n 0.519084 \n 1.549618 \n 1.282443 \n 0.167939 \n 0.267176 \n 0.468394 \n 0.212063 \n 0.319543 \n 1704.799281 \n 1731.814017 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.129576 \n 0.171123 \n 0.699301 \n 1512.538486 \n 1512.538486 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.143436 \n 0.179294 \n 0.677270 \n 1512.538486 \n 1512.538486 \n 0.557252 \n -0.282443 \n -0.274809 \n -0.687023 \n -0.519084 \n 0.450382 \n -1.282443 \n -0.167939 \n 1.732824 \n -0.338818 \n -0.040940 \n 0.379758 \n -1.922608e+02 \n -2.192755e+02 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n -0.013859 \n -0.008171 \n 0.022031 \n 0.000000 \n 0.000000 \n 0.244866 \n -33.351196 \n \n \n 143 \n italy \n serie-a \n 5786106 \n 19 \n 2015-01-18 19:45:00+00:00 \n juventus \n hellas verona \n 73 \n 33 \n 1 \n 0 \n 0 \n 4.0 \n 0.0 \n home \n 1 \n 24 \n 19 \n 2015-01-18 19:45:00+00:00 \n 2015-01-18 \n 16 \n 23 \n 0.826446 \n 0.081612 \n 0.091942 \n 4 \n 1891.511353 \n 1569.762085 \n 1.000000 \n 0.000000 \n 0.000000 \n 3.000000 \n 0.000000 \n 6.000000 \n 1.000000 \n 3.000000 \n 5.000000 \n 0.814543 \n 0.092395 \n 0.093062 \n 1512.538486 \n 1512.538486 \n 0.408333 \n 0.266667 \n 0.325000 \n 0.633333 \n 0.600000 \n 1.375000 \n 1.350000 \n 0.033333 \n 0.025000 \n 0.424112 \n 0.228192 \n 0.347695 \n 1516.987152 \n 1561.128075 \n 1.000000 \n 0.000000 \n 0.000000 \n 3.000000 \n 0.000000 \n 6.000000 \n 1.000000 \n 3.000000 \n 5.000000 \n 0.814543 \n 0.092395 \n 0.093062 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 3.000000 \n 1.000000 \n 6.000000 \n -3.000000 \n -5.000000 \n 0.093062 \n 0.092395 \n 0.814543 \n 1512.538486 \n 1512.538486 \n 0.591667 \n -0.266667 \n -0.325000 \n 2.366667 \n -0.600000 \n 4.625000 \n -0.350000 \n 2.966667 \n 4.975000 \n 0.390431 \n -0.135797 \n -0.254633 \n -4.448666e+00 \n -4.858959e+01 \n 1.000000 \n 0.000000 \n -1.000000 \n 3.000000 \n -3.000000 \n 5.000000 \n -5.000000 \n 6.000000 \n 10.000000 \n 0.721481 \n 0.000000 \n -0.721481 \n 0.000000 \n 0.000000 \n 0.734504 \n 321.749268 \n \n \n 145 \n spain \n laliga \n 5764484 \n 19 \n 2015-01-18 20:00:00+00:00 \n sevilla \n malaga \n 71 \n 12 \n 1 \n 0 \n 0 \n 2.0 \n 0.0 \n home \n 1 \n 42 \n 19 \n 2015-01-18 20:00:00+00:00 \n 2015-01-18 \n 14 \n 14 \n 0.722546 \n 0.158225 \n 0.119229 \n 4 \n 1830.244507 \n 1764.411743 \n 1.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 4.000000 \n 0.000000 \n 2.000000 \n 4.000000 \n 0.800570 \n 0.128205 \n 0.071225 \n 1512.538486 \n 1512.538486 \n 0.379032 \n 0.274194 \n 0.346774 \n 0.588710 \n 0.435484 \n 1.241935 \n 1.088710 \n 0.153226 \n 0.153226 \n 0.480576 \n 0.243777 \n 0.275648 \n 1635.100106 \n 1647.897169 \n 1.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n 0.000000 \n 4.000000 \n 0.000000 \n 2.000000 \n 4.000000 \n 0.800570 \n 0.128205 \n 0.071225 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 2.000000 \n 0.000000 \n 2.000000 \n 3.000000 \n 2.000000 \n -1.000000 \n 0.634093 \n 0.196526 \n 0.169381 \n 1512.538486 \n 1512.538486 \n 0.620968 \n -0.274194 \n -0.346774 \n 1.411290 \n -0.435484 \n 2.758065 \n -1.088710 \n 1.846774 \n 3.846774 \n 0.319994 \n -0.115572 \n -0.204422 \n -1.225616e+02 \n -1.353587e+02 \n 1.000000 \n 0.000000 \n -1.000000 \n 0.000000 \n 0.000000 \n 2.000000 \n -3.000000 \n 0.000000 \n 5.000000 \n 0.166476 \n -0.068320 \n -0.098156 \n 0.000000 \n 0.000000 \n 0.603317 \n 65.832764 \n \n \n 149 \n asia \n afc-asian-cup-group-c \n 5252518 \n 3 \n 2015-01-19 09:00:00+00:00 \n qatar \n bahrain \n 72 \n 1376 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n away \n 1 \n 1 \n 3 \n 2015-01-19 09:00:00+00:00 \n 2015-01-19 \n 14 \n 20 \n 0.581876 \n 0.222576 \n 0.195548 \n 2 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n -1.000000 \n 0.161068 \n 0.096469 \n 0.742463 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.333333 \n 0.666667 \n 0.333333 \n 1.666667 \n -0.333333 \n -1.333333 \n 0.268697 \n 0.151828 \n 0.579475 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n -1.000000 \n 0.161068 \n 0.096469 \n 0.742463 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 1.000000 \n 0.000000 \n 2.000000 \n -1.000000 \n -2.000000 \n 0.089991 \n 0.067034 \n 0.842975 \n 1512.538486 \n 1512.538486 \n 0.000000 \n 0.000000 \n 0.000000 \n -0.333333 \n -0.666667 \n -0.333333 \n -0.666667 \n 0.333333 \n 0.333333 \n -0.107629 \n -0.055360 \n 0.162988 \n 2.273737e-13 \n 2.273737e-13 \n 0.000000 \n 0.000000 \n 0.000000 \n 0.000000 \n -1.000000 \n 0.000000 \n -1.000000 \n 1.000000 \n 1.000000 \n 0.071077 \n 0.029435 \n -0.100512 \n 0.000000 \n 0.000000 \n 0.386328 \n 0.000000 \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 135575 \n portugal \n segunda-liga \n 8956894 \n 15 \n 2021-01-10 15:00:00+00:00 \n feirense \n ud oliveirense \n 280 \n 283 \n 0 \n 0 \n 1 \n 0.0 \n 1.0 \n home \n 0 \n 33 \n 15 \n 2021-01-10 15:00:00+00:00 \n 2021-01-10 \n 18 \n 18 \n 0.561049 \n 0.278652 \n 0.160300 \n 0 \n 1512.538486 \n 1512.538486 \n 0.300000 \n 0.208333 \n 0.491667 \n 0.425000 \n 0.650000 \n 0.966667 \n 1.416667 \n -0.225000 \n -0.450000 \n 0.255649 \n 0.266064 \n 0.478287 \n 1399.157367 \n 1496.549027 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.375000 \n 0.375000 \n 1.125000 \n 1.250000 \n 0.000000 \n -0.125000 \n 0.431636 \n 0.272441 \n 0.295922 \n 1512.538486 \n 1512.538486 \n 0.283333 \n 0.216667 \n 0.500000 \n 0.366667 \n 0.616667 \n 0.866667 \n 1.366667 \n -0.250000 \n -0.500000 \n 0.155670 \n 0.258575 \n 0.585755 \n 1400.155820 \n 1490.849094 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.375000 \n 0.375000 \n 1.125000 \n 1.250000 \n 0.000000 \n -0.125000 \n 0.431636 \n 0.272441 \n 0.295922 \n 1512.538486 \n 1512.538486 \n 0.050000 \n -0.041667 \n -0.008333 \n 0.050000 \n 0.275000 \n -0.158333 \n 0.166667 \n -0.225000 \n -0.325000 \n -0.175987 \n -0.006378 \n 0.182365 \n -1.133811e+02 \n -1.598946e+01 \n 0.033333 \n -0.033333 \n 0.000000 \n -0.008333 \n 0.241667 \n -0.258333 \n 0.116667 \n -0.250000 \n -0.375000 \n -0.275966 \n -0.013866 \n 0.289833 \n -112.382666 \n -21.689392 \n 0.400749 \n 0.000000 \n \n \n 135576 \n portugal \n segunda-liga \n 8956856 \n 15 \n 2021-01-10 17:00:00+00:00 \n arouca \n casa pia \n 39 \n 1647 \n 0 \n 1 \n 0 \n 1.0 \n 1.0 \n draw \n 0 \n 33 \n 15 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 20 \n 20 \n 0.258530 \n 0.402450 \n 0.339020 \n 0 \n 1512.538486 \n 1512.538486 \n 0.303030 \n 0.262626 \n 0.434343 \n 0.505051 \n 0.656566 \n 1.080808 \n 1.393939 \n -0.151515 \n -0.313131 \n 0.302763 \n 0.235111 \n 0.462126 \n 1454.798840 \n 1513.654585 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.333333 \n 1.166667 \n 1.250000 \n 2.083333 \n -0.833333 \n -0.833333 \n 0.229689 \n 0.290508 \n 0.479802 \n 1512.538486 \n 1512.538486 \n 0.224490 \n 0.285714 \n 0.489796 \n 0.387755 \n 0.816327 \n 0.938776 \n 1.571429 \n -0.428571 \n -0.632653 \n 0.177217 \n 0.234965 \n 0.587818 \n 1458.094223 \n 1516.193138 \n 0.250000 \n 0.250000 \n 0.500000 \n 0.333333 \n 1.166667 \n 1.250000 \n 2.083333 \n -0.833333 \n -0.833333 \n 0.229689 \n 0.290508 \n 0.479802 \n 1512.538486 \n 1512.538486 \n 0.053030 \n 0.012626 \n -0.065657 \n 0.171717 \n -0.510101 \n -0.169192 \n -0.689394 \n 0.681818 \n 0.520202 \n 0.073074 \n -0.055397 \n -0.017677 \n -5.773965e+01 \n 1.116099e+00 \n -0.025510 \n 0.035714 \n -0.010204 \n 0.054422 \n -0.350340 \n -0.311224 \n -0.511905 \n 0.404762 \n 0.200680 \n -0.052472 \n -0.055544 \n 0.108016 \n -54.444263 \n 3.654652 \n -0.080490 \n 0.000000 \n \n \n 135577 \n portugal \n segunda-liga \n 8956724 \n 15 \n 2021-01-10 17:00:00+00:00 \n cd cova da piedade \n fc vizela \n 1293 \n 1748 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n home \n 0 \n 33 \n 15 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 20 \n 18 \n 0.080214 \n 0.286765 \n 0.633021 \n 0 \n 1512.538486 \n 1512.538486 \n 0.272727 \n 0.227273 \n 0.500000 \n 0.363636 \n 0.636364 \n 1.000000 \n 1.454545 \n -0.272727 \n -0.454545 \n 0.230233 \n 0.298016 \n 0.471752 \n 1512.538486 \n 1512.538486 \n 0.714286 \n 0.142857 \n 0.142857 \n 0.571429 \n 0.285714 \n 1.428571 \n 1.000000 \n 0.285714 \n 0.428571 \n 0.481423 \n 0.312175 \n 0.206401 \n 1512.538486 \n 1512.538486 \n 0.333333 \n 0.111111 \n 0.555556 \n 0.666667 \n 0.444444 \n 1.000000 \n 1.555556 \n 0.222222 \n -0.555556 \n 0.108789 \n 0.279229 \n 0.611983 \n 1512.538486 \n 1512.538486 \n 0.714286 \n 0.142857 \n 0.142857 \n 0.571429 \n 0.285714 \n 1.428571 \n 1.000000 \n 0.285714 \n 0.428571 \n 0.481423 \n 0.312175 \n 0.206401 \n 1512.538486 \n 1512.538486 \n -0.441558 \n 0.084416 \n 0.357143 \n -0.207792 \n 0.350649 \n -0.428571 \n 0.454545 \n -0.558442 \n -0.883117 \n -0.251191 \n -0.014160 \n 0.265350 \n -6.821210e-13 \n -6.821210e-13 \n -0.380952 \n -0.031746 \n 0.412698 \n 0.095238 \n 0.158730 \n -0.428571 \n 0.555556 \n -0.063492 \n -0.984127 \n -0.372634 \n -0.032947 \n 0.405581 \n 0.000000 \n 0.000000 \n -0.552807 \n 0.000000 \n \n \n 135578 \n romania \n liga-i \n 9270007 \n 12 \n 2021-01-10 17:00:00+00:00 \n fc hermannstadt \n fc viitorul constanta \n 1499 \n 594 \n 0 \n 0 \n 1 \n 1.0 \n 2.0 \n home \n 0 \n 34 \n 12 \n 2021-01-10 17:00:00+00:00 \n 2021-01-10 \n 14 \n 18 \n 0.068589 \n 0.307603 \n 0.623808 \n 0 \n 1285.478027 \n 1352.687866 \n 0.273684 \n 0.315789 \n 0.410526 \n 0.431579 \n 0.515789 \n 1.021053 \n 1.357895 \n -0.084211 \n -0.336842 \n 0.262723 \n 0.286901 \n 0.450376 \n 1286.024429 \n 1360.001905 \n 0.500000 \n 0.204545 \n 0.295455 \n 0.727273 \n 0.477273 \n 1.715909 \n 1.056818 \n 0.250000 \n 0.659091 \n 0.517677 \n 0.237200 \n 0.245123 \n 1430.607804 \n 1402.993755 \n 0.191489 \n 0.340426 \n 0.468085 \n 0.340426 \n 0.702128 \n 0.936170 \n 1.574468 \n -0.361702 \n -0.638298 \n 0.144103 \n 0.278918 \n 0.576980 \n 1283.879600 \n 1362.846871 \n 0.500000 \n 0.204545 \n 0.295455 \n 0.727273 \n 0.477273 \n 1.715909 \n 1.056818 \n 0.250000 \n 0.659091 \n 0.517677 \n 0.237200 \n 0.245123 \n 1430.607804 \n 1402.993755 \n -0.226316 \n 0.111244 \n 0.115072 \n -0.295694 \n 0.038517 \n -0.694856 \n 0.301077 \n -0.334211 \n -0.995933 \n -0.254954 \n 0.049701 \n 0.205253 \n -1.445834e+02 \n -4.299185e+01 \n -0.308511 \n 0.135880 \n 0.172631 \n -0.386847 \n 0.224855 \n -0.779739 \n 0.517650 \n -0.611702 \n -1.297389 \n -0.373574 \n 0.041718 \n 0.331856 \n -146.728203 \n -40.146884 \n -0.555220 \n -67.209839 \n \n \n 135579 \n scotland \n premiership \n 8736289 \n 23 \n 2021-01-10 15:00:00+00:00 \n rangers \n aberdeen \n 306 \n 685 \n 1 \n 0 \n 0 \n 2.0 \n 1.0 \n away \n 0 \n 36 \n 23 \n 2021-01-10 15:00:00+00:00 \n 2021-01-10 \n 18 \n 5 \n 0.647623 \n 0.222222 \n 0.130155 \n 2 \n 1699.736084 \n 1433.051392 \n 0.525316 \n 0.215190 \n 0.259494 \n 0.879747 \n 0.518987 \n 1.784810 \n 1.151899 \n 0.360759 \n 0.632911 \n 0.568873 \n 0.194163 \n 0.236964 \n 1466.106194 \n 1432.634170 \n 0.433333 \n 0.133333 \n 0.433333 \n 0.600000 \n 0.866667 \n 1.300000 \n 1.566667 \n -0.266667 \n -0.266667 \n 0.384568 \n 0.214804 \n 0.400628 \n 1433.575904 \n 1465.769550 \n 0.486486 \n 0.243243 \n 0.270270 \n 0.918919 \n 0.558559 \n 1.774775 \n 1.216216 \n 0.360360 \n 0.558559 \n 0.564439 \n 0.197249 \n 0.238312 \n 1454.664049 \n 1392.435630 \n 0.433333 \n 0.133333 \n 0.433333 \n 0.600000 \n 0.866667 \n 1.300000 \n 1.566667 \n -0.266667 \n -0.266667 \n 0.384568 \n 0.214804 \n 0.400628 \n 1433.575904 \n 1465.769550 \n 0.091983 \n 0.081857 \n -0.173840 \n 0.279747 \n -0.347679 \n 0.484810 \n -0.414768 \n 0.627426 \n 0.899578 \n 0.184305 \n -0.020641 \n -0.163664 \n 3.253029e+01 \n -3.313538e+01 \n 0.053153 \n 0.109910 \n -0.163063 \n 0.318919 \n -0.308108 \n 0.474775 \n -0.350450 \n 0.627027 \n 0.825225 \n 0.179871 \n -0.017555 \n -0.162316 \n 21.088145 \n -73.333920 \n 0.517467 \n 266.684692 \n \n \n
\n
130140 rows × 114 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 81
+ }
+ ],
+ "source": [
+ "COL_NUM=['pop_r', 'elo1', 'elo2']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " country liga mid round ds \\\n",
+ "67450 greece super-league 9197411 11 2020-12-05 17:30:00+00:00 \n",
+ "135240 greece super-league 9197411 11 2020-12-05 17:30:00+00:00 \n",
+ "\n",
+ " t1 t2 tid1 tid2 w1 wx w2 ft1 ft2 winner side \\\n",
+ "67450 olympiacos volos nfc 319 1653 1 0 0 4.0 1.0 home 1 \n",
+ "135240 volos nfc olympiacos 1653 319 0 0 1 1.0 4.0 home 0 \n",
+ "\n",
+ " country_id round ds de form1 \\\n",
+ "67450 21 11 2020-12-05 17:30:00+00:00 2020-12-05 14 \n",
+ "135240 21 11 2020-12-05 17:30:00+00:00 2020-12-05 14 \n",
+ "\n",
+ " form2 vote1 votex vote2 pop_r elo1 elo2 \\\n",
+ "67450 14 0.776807 0.169688 0.053505 2 1686.436646 1252.658447 \n",
+ "135240 14 0.053505 0.169688 0.776807 2 1252.658447 1686.436646 \n",
+ "\n",
+ " tid_x home_w1_tt_avg home_wx_tt_avg home_w2_tt_avg \\\n",
+ "67450 319 0.750000 0.132812 0.117188 \n",
+ "135240 1653 0.270833 0.291667 0.437500 \n",
+ "\n",
+ " home_ht1_tt_avg home_ht2_tt_avg home_ft1_tt_avg home_ft2_tt_avg \\\n",
+ "67450 0.9375 0.234375 2.3125 0.609375 \n",
+ "135240 0.3125 0.666667 0.9375 1.500000 \n",
+ "\n",
+ " home_ps_ht_tt_avg home_ps_ft_tt_avg home_vote1_tt_avg \\\n",
+ "67450 0.703125 1.703125 0.660547 \n",
+ "135240 -0.354167 -0.562500 0.232241 \n",
+ "\n",
+ " home_votex_tt_avg home_vote2_tt_avg home_elo1_tt_avg \\\n",
+ "67450 0.160972 0.178481 1664.798659 \n",
+ "135240 0.277189 0.490570 1296.581658 \n",
+ "\n",
+ " home_elo2_tt_avg tid_y w1_th_avg wx_th_avg w2_th_avg ht1_th_avg \\\n",
+ "67450 1461.905214 319.0 0.75 0.132812 0.117188 0.9375 \n",
+ "135240 1412.341579 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " ht2_th_avg ft1_th_avg ft2_th_avg ps_ht_th_avg ps_ft_th_avg \\\n",
+ "67450 0.234375 2.3125 0.609375 0.703125 1.703125 \n",
+ "135240 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " vote1_th_avg votex_th_avg vote2_th_avg elo1_th_avg elo2_th_avg \\\n",
+ "67450 0.660547 0.160972 0.178481 1664.798659 1461.905214 \n",
+ "135240 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " tid_x away_w1_tt_avg away_wx_tt_avg away_w2_tt_avg \\\n",
+ "67450 1653 0.270833 0.291667 0.437500 \n",
+ "135240 319 0.750000 0.132812 0.117188 \n",
+ "\n",
+ " away_ht1_tt_avg away_ht2_tt_avg away_ft1_tt_avg away_ft2_tt_avg \\\n",
+ "67450 0.3125 0.666667 0.9375 1.500000 \n",
+ "135240 0.9375 0.234375 2.3125 0.609375 \n",
+ "\n",
+ " away_ps_ht_tt_avg away_ps_ft_tt_avg away_vote1_tt_avg \\\n",
+ "67450 -0.354167 -0.562500 0.232241 \n",
+ "135240 0.703125 1.703125 0.660547 \n",
+ "\n",
+ " away_votex_tt_avg away_vote2_tt_avg away_elo1_tt_avg \\\n",
+ "67450 0.277189 0.490570 1296.581658 \n",
+ "135240 0.160972 0.178481 1664.798659 \n",
+ "\n",
+ " away_elo2_tt_avg tid_y w1_ta_avg wx_ta_avg w2_ta_avg ht1_ta_avg \\\n",
+ "67450 1412.341579 1653.0 0.208333 0.291667 0.5 0.416667 \n",
+ "135240 1461.905214 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " ht2_ta_avg ft1_ta_avg ft2_ta_avg ps_ht_ta_avg ps_ft_ta_avg \\\n",
+ "67450 0.666667 1.0 1.791667 -0.25 -0.791667 \n",
+ "135240 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " vote1_ta_avg votex_ta_avg vote2_ta_avg elo1_ta_avg elo2_ta_avg \n",
+ "67450 0.122681 0.26322 0.614099 1300.265222 1419.646438 \n",
+ "135240 NaN NaN NaN NaN NaN "
+ ],
+ "text/html": "
\n\n
\n \n \n \n country \n liga \n mid \n round \n ds \n t1 \n t2 \n tid1 \n tid2 \n w1 \n wx \n w2 \n ft1 \n ft2 \n winner \n side \n country_id \n round \n ds \n de \n form1 \n form2 \n vote1 \n votex \n vote2 \n pop_r \n elo1 \n elo2 \n tid_x \n home_w1_tt_avg \n home_wx_tt_avg \n home_w2_tt_avg \n home_ht1_tt_avg \n home_ht2_tt_avg \n home_ft1_tt_avg \n home_ft2_tt_avg \n home_ps_ht_tt_avg \n home_ps_ft_tt_avg \n home_vote1_tt_avg \n home_votex_tt_avg \n home_vote2_tt_avg \n home_elo1_tt_avg \n home_elo2_tt_avg \n tid_y \n w1_th_avg \n wx_th_avg \n w2_th_avg \n ht1_th_avg \n ht2_th_avg \n ft1_th_avg \n ft2_th_avg \n ps_ht_th_avg \n ps_ft_th_avg \n vote1_th_avg \n votex_th_avg \n vote2_th_avg \n elo1_th_avg \n elo2_th_avg \n tid_x \n away_w1_tt_avg \n away_wx_tt_avg \n away_w2_tt_avg \n away_ht1_tt_avg \n away_ht2_tt_avg \n away_ft1_tt_avg \n away_ft2_tt_avg \n away_ps_ht_tt_avg \n away_ps_ft_tt_avg \n away_vote1_tt_avg \n away_votex_tt_avg \n away_vote2_tt_avg \n away_elo1_tt_avg \n away_elo2_tt_avg \n tid_y \n w1_ta_avg \n wx_ta_avg \n w2_ta_avg \n ht1_ta_avg \n ht2_ta_avg \n ft1_ta_avg \n ft2_ta_avg \n ps_ht_ta_avg \n ps_ft_ta_avg \n vote1_ta_avg \n votex_ta_avg \n vote2_ta_avg \n elo1_ta_avg \n elo2_ta_avg \n \n \n \n \n 67450 \n greece \n super-league \n 9197411 \n 11 \n 2020-12-05 17:30:00+00:00 \n olympiacos \n volos nfc \n 319 \n 1653 \n 1 \n 0 \n 0 \n 4.0 \n 1.0 \n home \n 1 \n 21 \n 11 \n 2020-12-05 17:30:00+00:00 \n 2020-12-05 \n 14 \n 14 \n 0.776807 \n 0.169688 \n 0.053505 \n 2 \n 1686.436646 \n 1252.658447 \n 319 \n 0.750000 \n 0.132812 \n 0.117188 \n 0.9375 \n 0.234375 \n 2.3125 \n 0.609375 \n 0.703125 \n 1.703125 \n 0.660547 \n 0.160972 \n 0.178481 \n 1664.798659 \n 1461.905214 \n 319.0 \n 0.75 \n 0.132812 \n 0.117188 \n 0.9375 \n 0.234375 \n 2.3125 \n 0.609375 \n 0.703125 \n 1.703125 \n 0.660547 \n 0.160972 \n 0.178481 \n 1664.798659 \n 1461.905214 \n 1653 \n 0.270833 \n 0.291667 \n 0.437500 \n 0.3125 \n 0.666667 \n 0.9375 \n 1.500000 \n -0.354167 \n -0.562500 \n 0.232241 \n 0.277189 \n 0.490570 \n 1296.581658 \n 1412.341579 \n 1653.0 \n 0.208333 \n 0.291667 \n 0.5 \n 0.416667 \n 0.666667 \n 1.0 \n 1.791667 \n -0.25 \n -0.791667 \n 0.122681 \n 0.26322 \n 0.614099 \n 1300.265222 \n 1419.646438 \n \n \n 135240 \n greece \n super-league \n 9197411 \n 11 \n 2020-12-05 17:30:00+00:00 \n volos nfc \n olympiacos \n 1653 \n 319 \n 0 \n 0 \n 1 \n 1.0 \n 4.0 \n home \n 0 \n 21 \n 11 \n 2020-12-05 17:30:00+00:00 \n 2020-12-05 \n 14 \n 14 \n 0.053505 \n 0.169688 \n 0.776807 \n 2 \n 1252.658447 \n 1686.436646 \n 1653 \n 0.270833 \n 0.291667 \n 0.437500 \n 0.3125 \n 0.666667 \n 0.9375 \n 1.500000 \n -0.354167 \n -0.562500 \n 0.232241 \n 0.277189 \n 0.490570 \n 1296.581658 \n 1412.341579 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n 319 \n 0.750000 \n 0.132812 \n 0.117188 \n 0.9375 \n 0.234375 \n 2.3125 \n 0.609375 \n 0.703125 \n 1.703125 \n 0.660547 \n 0.160972 \n 0.178481 \n 1664.798659 \n 1461.905214 \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n NaN \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 43
+ }
+ ],
+ "source": [
+ "df_[df_['mid']==9197411]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/dl.ipynb b/dl.ipynb
index b391583..050b38b 100644
--- a/dl.ipynb
+++ b/dl.ipynb
@@ -28,7 +28,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -51,6 +51,203 @@
"%autoreload 2"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1=pd.read_csv('data/op/matches_done.csv', index_col=None)\n",
+ "df1.drop_duplicates(subset='link').to_csv('data/op/matches_done1.csv', index=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(329958, 31) (8668, 33) (338626, 33)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df3=pd.concat([df1, df2], axis=0)\n",
+ "print(df1.shape,df2.shape,df3.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df3.to_csv('data/sofa/statistics.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv('data/fbref/matches.csv')\n",
+ "df['id']=df.link.apply(lambda x: x.split('/')[3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "raw/fbref/matches/22a24ffa.htm 201030\n",
+ "raw/fbref/matches/67e63bcf.htm 209407\n",
+ "raw/fbref/matches/fd457ac5.htm 203377\n",
+ "raw/fbref/matches/20a3bbe2.htm 203534\n",
+ "raw/fbref/matches/e5b88fa8.htm 199517\n",
+ "raw/fbref/matches/e0516d1f.htm 212733\n",
+ "raw/fbref/matches/e6c44aaf.htm 206716\n",
+ "raw/fbref/matches/b9e082c3.htm 211194\n",
+ "raw/fbref/matches/3e2548f0.htm 209397\n",
+ "raw/fbref/matches/ffb3031c.htm 206483\n",
+ "raw/fbref/matches/ee52969d.htm 191758\n",
+ "raw/fbref/matches/f41fce84.htm 199907\n",
+ "raw/fbref/matches/e7837e18.htm 210982\n",
+ "raw/fbref/matches/a02fd3af.htm 207772\n",
+ "raw/fbref/matches/a3446c57.htm 203220\n",
+ "raw/fbref/matches/157b6509.htm 202152\n",
+ "raw/fbref/matches/a02fbb1d.htm 201590\n",
+ "raw/fbref/matches/5bb581a0.htm 201066\n",
+ "raw/fbref/matches/0a2b1965.htm 209134\n",
+ "raw/fbref/matches/f22ae0bc.htm 202024\n",
+ "raw/fbref/matches/62169af8.htm 212048\n",
+ "raw/fbref/matches/709a9f1c.htm 215233\n",
+ "raw/fbref/matches/00f2921c.htm 202406\n",
+ "raw/fbref/matches/41d2a28a.htm 210281\n",
+ "raw/fbref/matches/c421a4b4.htm 191917\n",
+ "raw/fbref/matches/4b61bf4c.htm 208978\n",
+ "raw/fbref/matches/17aa4a64.htm 199965\n",
+ "raw/fbref/matches/5a8c985a.htm 211806\n",
+ "raw/fbref/matches/c3294ed3.htm 207824\n",
+ "raw/fbref/matches/bc3aca30.htm 212726\n",
+ "raw/fbref/matches/9724e9af.htm 131884\n",
+ "raw/fbref/matches/7191bed5.htm 208046\n",
+ "raw/fbref/matches/03901566.htm 208210\n",
+ "raw/fbref/matches/faa59a57.htm 187710\n",
+ "raw/fbref/matches/a6698389.htm 210752\n",
+ "raw/fbref/matches/316f3a9a.htm 197985\n",
+ "raw/fbref/matches/3060bd26.htm 211347\n"
+ ]
+ }
+ ],
+ "source": [
+ "folder='raw/fbref/matches/'\n",
+ "for row in df.itertuples():\n",
+ " fn=folder+row.id+'.htm'\n",
+ " with open(fn, 'r', encoding='utf8') as f:\n",
+ " txt=f.read()\n",
+ " print(fn,len(txt))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfd=pd.read_csv('data/op/matches_done.csv')\n",
+ "#df=pd.read_csv('data/op/matches.csv')\n",
+ "dfd0=dfd[dfd.done==0]\n",
+ "dfd1=dfd[dfd.done==1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfd0.to_csv('data/op/matches.csv', index=False)\n",
+ "dfd1.to_csv('data/op/matches_done.csv', index=False)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ds country champ t1 \\\n",
+ "102151 2020-02-08 au A-League Brisbane \n",
+ "103389 2017-11-24 NaN WCQ — UEFA (W) Hungary \n",
+ "103407 2017-04-05 ve Primera División Zamora \n",
+ "103410 2015-10-30 bg A Group Slavia Sofia \n",
+ "103411 2019-08-10 eng FA Cup Bishop AL \n",
+ "122918 2015-10-20 eng League One Sheffield Utd \n",
+ "122925 2017-10-07 NaN Friendlies (M) Tanzania \n",
+ "122933 2016-11-06 nl Dutch Eredivisie AZ Alkmaar \n",
+ "122936 2018-11-17 it Serie A AS Roma \n",
+ "122937 2020-10-17 se Damallsvenskan Djurgården \n",
+ "122947 2016-10-02 ru Russian Premier League Krasnodar \n",
+ "122951 2016-07-05 NaN Europa League Chikhura \n",
+ "122955 2016-09-09 hr 1. HNL Inter Zaprešić \n",
+ "122958 2018-05-19 fr Division 1 Féminine Soyaux \n",
+ "122962 2015-01-18 es Segunda División Sporting Gijón \n",
+ "\n",
+ " t2 sc1 sc2 \\\n",
+ "102151 Adelaide 2.0 1.0 \n",
+ "103389 Ukraine 0.0 1.0 \n",
+ "103407 Atlé Venezuela 4.0 0.0 \n",
+ "103410 Botev Plovdiv 2.0 0.0 \n",
+ "103411 Thornaby 0.0 2.0 \n",
+ "122918 Fleetwood Town 3.0 0.0 \n",
+ "122925 Malawi 1.0 1.0 \n",
+ "122933 Ajax 2.0 2.0 \n",
+ "122936 ChievoVerona 7.0 1.0 \n",
+ "122937 Linköping 0.0 3.0 \n",
+ "122947 Rubin Kazan 1.0 0.0 \n",
+ "122951 Zimbru Chișinău 2.0 3.0 \n",
+ "122955 Hajduk Split 1.0 1.0 \n",
+ "122958 Bordeaux 1.0 0.0 \n",
+ "122962 Betis 1.0 2.0 \n",
+ "\n",
+ " link done \n",
+ "102151 /en/matches/08016b59/Brisbane-Roar-Adelaide-Un... 0 \n",
+ "103389 /en/matches/408f5dcd/Hungary-Ukraine-November-... 0 \n",
+ "103407 /en/matches/403d8b5a/Zamora-Atletico-Venezuela... 0 \n",
+ "103410 /en/matches/a6ac4472/Slavia-Sofia-Botev-Plovdi... 0 \n",
+ "103411 /en/matches/97eb5d25/Bishop-AL-Thornaby-August... 0 \n",
+ "122918 /en/matches/5bce5855/Sheffield-United-Fleetwoo... 0 \n",
+ "122925 /en/matches/cef8af19/Tanzania-Malawi-October-7... 0 \n",
+ "122933 /en/matches/5d6b3601/AZ-Alkmaar-Ajax-November-... 0 \n",
+ "122936 /en/matches/07da3f53/AS-Roma-ChievoVerona-Nove... 0 \n",
+ "122937 /en/matches/76950db8/Djurgarden-Linkoping-Octo... 0 \n",
+ "122947 /en/matches/14427644/Krasnodar-Rubin-Kazan-Oct... 0 \n",
+ "122951 /en/matches/e849cec7/Chikhura-Sachkhere-Zimbru... 0 \n",
+ "122955 /en/matches/b77964bc/Inter-Zapresic-Hajduk-Spl... 0 \n",
+ "122958 /en/matches/c4f419d8/Soyaux-Bordeaux-May-19-20... 0 \n",
+ "122962 /en/matches/351f08f5/Sporting-Gijon-Real-Betis... 0 "
+ ],
+ "text/html": "
\n\n
\n \n \n \n ds \n country \n champ \n t1 \n t2 \n sc1 \n sc2 \n link \n done \n \n \n \n \n 102151 \n 2020-02-08 \n au \n A-League \n Brisbane \n Adelaide \n 2.0 \n 1.0 \n /en/matches/08016b59/Brisbane-Roar-Adelaide-Un... \n 0 \n \n \n 103389 \n 2017-11-24 \n NaN \n WCQ — UEFA (W) \n Hungary \n Ukraine \n 0.0 \n 1.0 \n /en/matches/408f5dcd/Hungary-Ukraine-November-... \n 0 \n \n \n 103407 \n 2017-04-05 \n ve \n Primera División \n Zamora \n Atlé Venezuela \n 4.0 \n 0.0 \n /en/matches/403d8b5a/Zamora-Atletico-Venezuela... \n 0 \n \n \n 103410 \n 2015-10-30 \n bg \n A Group \n Slavia Sofia \n Botev Plovdiv \n 2.0 \n 0.0 \n /en/matches/a6ac4472/Slavia-Sofia-Botev-Plovdi... \n 0 \n \n \n 103411 \n 2019-08-10 \n eng \n FA Cup \n Bishop AL \n Thornaby \n 0.0 \n 2.0 \n /en/matches/97eb5d25/Bishop-AL-Thornaby-August... \n 0 \n \n \n 122918 \n 2015-10-20 \n eng \n League One \n Sheffield Utd \n Fleetwood Town \n 3.0 \n 0.0 \n /en/matches/5bce5855/Sheffield-United-Fleetwoo... \n 0 \n \n \n 122925 \n 2017-10-07 \n NaN \n Friendlies (M) \n Tanzania \n Malawi \n 1.0 \n 1.0 \n /en/matches/cef8af19/Tanzania-Malawi-October-7... \n 0 \n \n \n 122933 \n 2016-11-06 \n nl \n Dutch Eredivisie \n AZ Alkmaar \n Ajax \n 2.0 \n 2.0 \n /en/matches/5d6b3601/AZ-Alkmaar-Ajax-November-... \n 0 \n \n \n 122936 \n 2018-11-17 \n it \n Serie A \n AS Roma \n ChievoVerona \n 7.0 \n 1.0 \n /en/matches/07da3f53/AS-Roma-ChievoVerona-Nove... \n 0 \n \n \n 122937 \n 2020-10-17 \n se \n Damallsvenskan \n Djurgården \n Linköping \n 0.0 \n 3.0 \n /en/matches/76950db8/Djurgarden-Linkoping-Octo... \n 0 \n \n \n 122947 \n 2016-10-02 \n ru \n Russian Premier League \n Krasnodar \n Rubin Kazan \n 1.0 \n 0.0 \n /en/matches/14427644/Krasnodar-Rubin-Kazan-Oct... \n 0 \n \n \n 122951 \n 2016-07-05 \n NaN \n Europa League \n Chikhura \n Zimbru Chișinău \n 2.0 \n 3.0 \n /en/matches/e849cec7/Chikhura-Sachkhere-Zimbru... \n 0 \n \n \n 122955 \n 2016-09-09 \n hr \n 1. HNL \n Inter Zaprešić \n Hajduk Split \n 1.0 \n 1.0 \n /en/matches/b77964bc/Inter-Zapresic-Hajduk-Spl... \n 0 \n \n \n 122958 \n 2018-05-19 \n fr \n Division 1 Féminine \n Soyaux \n Bordeaux \n 1.0 \n 0.0 \n /en/matches/c4f419d8/Soyaux-Bordeaux-May-19-20... \n 0 \n \n \n 122962 \n 2015-01-18 \n es \n Segunda División \n Sporting Gijón \n Betis \n 1.0 \n 2.0 \n /en/matches/351f08f5/Sporting-Gijon-Real-Betis... \n 0 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 24
+ }
+ ],
+ "source": [
+ "dfd0"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -442,6 +639,13 @@
"cell_type": "markdown",
"metadata": {}
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -692,14 +896,108 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "https://fbref.com/en/matches/2020-12-01 raw/fbref/days/2020-12-01.htm\n",
+ "raw/fbref/days/2020-12-01.htm exists!\n"
+ ]
+ }
+ ],
"source": [
"from data_provider import DataProvider\n",
"\n",
"dp=DataProvider()\n",
- "dp.load_fbref_matches()"
+ "#dp.load_fbref_matches()\n",
+ "df=pd.read_csv('data/sofa/matches_done.csv')\n",
+ "ds=df.ts.max()[:10]\n",
+ "de='{:%Y-%m-%d}'.format(datetime.today()-timedelta(days=1))\n",
+ "dp.load_fbref_days(ds, de)"
+ ]
+ },
+ {
+ "source": [
+ "## Days"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv('data/sofa/matches_done.csv')\n",
+ "ds=df.ts.max()[:10]\n",
+ "de='{:%Y-%m-%d}'.format(datetime.today()-timedelta(days=1))\n",
+ "d = datetime.strptime(ds, '%Y-%m-%d')\n",
+ "de = datetime.strptime(de, '%Y-%m-%d')\n",
+ "dates=[]\n",
+ "while d<=de:\n",
+ " dates.append(d)\n",
+ " d+=timedelta(days=1)\n",
+ "\n",
+ "# https://fbref.com/en/matches/2021-01-04"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[datetime.datetime(2020, 12, 1, 0, 0),\n",
+ " datetime.datetime(2020, 12, 2, 0, 0),\n",
+ " datetime.datetime(2020, 12, 3, 0, 0),\n",
+ " datetime.datetime(2020, 12, 4, 0, 0),\n",
+ " datetime.datetime(2020, 12, 5, 0, 0),\n",
+ " datetime.datetime(2020, 12, 6, 0, 0),\n",
+ " datetime.datetime(2020, 12, 7, 0, 0),\n",
+ " datetime.datetime(2020, 12, 8, 0, 0),\n",
+ " datetime.datetime(2020, 12, 9, 0, 0),\n",
+ " datetime.datetime(2020, 12, 10, 0, 0),\n",
+ " datetime.datetime(2020, 12, 11, 0, 0),\n",
+ " datetime.datetime(2020, 12, 12, 0, 0),\n",
+ " datetime.datetime(2020, 12, 13, 0, 0),\n",
+ " datetime.datetime(2020, 12, 14, 0, 0),\n",
+ " datetime.datetime(2020, 12, 15, 0, 0),\n",
+ " datetime.datetime(2020, 12, 16, 0, 0),\n",
+ " datetime.datetime(2020, 12, 17, 0, 0),\n",
+ " datetime.datetime(2020, 12, 18, 0, 0),\n",
+ " datetime.datetime(2020, 12, 19, 0, 0),\n",
+ " datetime.datetime(2020, 12, 20, 0, 0),\n",
+ " datetime.datetime(2020, 12, 21, 0, 0),\n",
+ " datetime.datetime(2020, 12, 22, 0, 0),\n",
+ " datetime.datetime(2020, 12, 23, 0, 0),\n",
+ " datetime.datetime(2020, 12, 24, 0, 0),\n",
+ " datetime.datetime(2020, 12, 25, 0, 0),\n",
+ " datetime.datetime(2020, 12, 26, 0, 0),\n",
+ " datetime.datetime(2020, 12, 27, 0, 0),\n",
+ " datetime.datetime(2020, 12, 28, 0, 0),\n",
+ " datetime.datetime(2020, 12, 29, 0, 0),\n",
+ " datetime.datetime(2020, 12, 30, 0, 0),\n",
+ " datetime.datetime(2020, 12, 31, 0, 0),\n",
+ " datetime.datetime(2021, 1, 1, 0, 0),\n",
+ " datetime.datetime(2021, 1, 2, 0, 0),\n",
+ " datetime.datetime(2021, 1, 3, 0, 0),\n",
+ " datetime.datetime(2021, 1, 4, 0, 0)]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ],
+ "source": [
+ "for data in self.DATA:\n",
+ " self._load_data(data)"
]
},
{
@@ -913,13 +1211,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 45,
"metadata": {
- "tags": [
- "outputPrepend"
- ]
+ "tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "30\n"
+ ]
+ }
+ ],
"source": [
"from op_parser import OpParser\n",
"\n",
@@ -929,9 +1233,759 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "roup-g',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Gladiator',\n",
+ " 't2': 'Nocerina',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.46',\n",
+ " 'oddsdraw': '3.09',\n",
+ " 'odds2': '2.61',\n",
+ " 'bn': '2',\n",
+ " 'link': '/soccer/italy/serie-d-group-g/san-felice-gladiator-nocerina-dIdZRVFs/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-d-group-g',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Torres',\n",
+ " 't2': 'Vis Artena',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '2.30',\n",
+ " 'oddsdraw': '3.30',\n",
+ " 'odds2': '2.70',\n",
+ " 'bn': '3',\n",
+ " 'link': '/soccer/italy/serie-d-group-g/sassari-torres-vis-artena-QcljOmF6/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-d-group-h',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Puteolana',\n",
+ " 't2': 'Altamura',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '2.75',\n",
+ " 'oddsdraw': '3.30',\n",
+ " 'odds2': '2.25',\n",
+ " 'bn': '1',\n",
+ " 'link': '/soccer/italy/serie-d-group-h/puteolana-internapoli-altamura-WIunKoCQ/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-d-group-h',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Portici 1906',\n",
+ " 't2': 'Gravina',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '2.28',\n",
+ " 'oddsdraw': '3.40',\n",
+ " 'odds2': '2.73',\n",
+ " 'bn': '9',\n",
+ " 'link': '/soccer/italy/serie-d-group-h/portici-1906-gravina-MoTlHIen/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-d-group-i',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'ACR Messina',\n",
+ " 't2': 'Gelbison Cilento',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '1.70',\n",
+ " 'oddsdraw': '3.54',\n",
+ " 'odds2': '4.26',\n",
+ " 'bn': '8',\n",
+ " 'link': '/soccer/italy/serie-d-group-i/messina-gelbison-cilento-zBxzPS98/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'liberia',\n",
+ " 'liga': 'lfa-first-division',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'LPRC Oiler',\n",
+ " 't2': 'Nimba Kwado',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.14',\n",
+ " 'oddsdraw': '3.07',\n",
+ " 'odds2': '3.27',\n",
+ " 'bn': '3',\n",
+ " 'link': '/soccer/liberia/lfa-first-division/lprc-oiler-nimba-kwado-YV1vxYeP/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'montenegro',\n",
+ " 'liga': 'prva-crnogorska-liga',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'OFK Petrovac',\n",
+ " 't2': 'Zeta',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.70',\n",
+ " 'oddsdraw': '2.79',\n",
+ " 'odds2': '2.79',\n",
+ " 'bn': '12',\n",
+ " 'link': '/soccer/montenegro/prva-crnogorska-liga/ofk-petrovac-zeta-n5zT4jCa/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 30),\n",
+ " 'country': 'russia',\n",
+ " 'liga': 'premier-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Dynamo Moscow',\n",
+ " 't2': 'Arsenal Tula',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.61',\n",
+ " 'oddsdraw': '3.83',\n",
+ " 'odds2': '5.93',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/russia/premier-league/dynamo-moscow-arsenal-tula-tpggow7a/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 13, 45),\n",
+ " 'country': 'france',\n",
+ " 'liga': 'division-1-women',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Paris SG W',\n",
+ " 't2': 'Paris FC W',\n",
+ " 'sc1': '4',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '1.06',\n",
+ " 'oddsdraw': '9.61',\n",
+ " 'odds2': '24.12',\n",
+ " 'bn': '10',\n",
+ " 'link': '/soccer/france/division-1-women/paris-sg-paris-fc-lAWkrjQ8/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'bosnia-and-herzegovina',\n",
+ " 'liga': 'premier-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Zeljeznicar',\n",
+ " 't2': 'Siroki Brijeg',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '1.64',\n",
+ " 'oddsdraw': '3.59',\n",
+ " 'odds2': '4.91',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/bosnia-and-herzegovina/premier-league/zeljeznicar-siroki-brijeg-IsAaowNo/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'burundi',\n",
+ " 'liga': 'primus-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Atletico Olympic',\n",
+ " 't2': 'Aigle Noir',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '3.14',\n",
+ " 'oddsdraw': '3.39',\n",
+ " 'odds2': '2.01',\n",
+ " 'bn': '5',\n",
+ " 'link': '/soccer/burundi/primus-league/atletico-olympic-aigle-noir-zyUiIoon/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'denmark',\n",
+ " 'liga': '1st-division',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Vendsyssel',\n",
+ " 't2': 'Silkeborg',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '6.05',\n",
+ " 'oddsdraw': '4.60',\n",
+ " 'odds2': '1.46',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/denmark/1st-division/vendsyssel-ff-silkeborg-0GzHU7G2/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'england',\n",
+ " 'liga': 'women-s-championship',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Leicester W',\n",
+ " 't2': 'London Bees W',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.21',\n",
+ " 'oddsdraw': '6.18',\n",
+ " 'odds2': '9.17',\n",
+ " 'bn': '8',\n",
+ " 'link': '/soccer/england/women-s-championship/leicester-london-bees-j7j1lsws/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'england',\n",
+ " 'liga': 'women-s-championship',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Lewes W',\n",
+ " 't2': 'Charlton W',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '1.78',\n",
+ " 'oddsdraw': '3.67',\n",
+ " 'odds2': '3.72',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/england/women-s-championship/lewes-charlton-Uck5m1hm/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'england',\n",
+ " 'liga': 'women-s-championship',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Liverpool W',\n",
+ " 't2': 'Crystal Palace W',\n",
+ " 'sc1': '4',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.30',\n",
+ " 'oddsdraw': '5.01',\n",
+ " 'odds2': '7.65',\n",
+ " 'bn': '8',\n",
+ " 'link': '/soccer/england/women-s-championship/liverpool-crystal-palace-nZqAnL7g/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'england',\n",
+ " 'liga': 'women-s-championship',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'London City Lionesses W',\n",
+ " 't2': 'Coventry United W',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.43',\n",
+ " 'oddsdraw': '4.51',\n",
+ " 'odds2': '5.34',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/england/women-s-championship/london-city-lionesses-coventry-united-QPrEouNa/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'germany',\n",
+ " 'liga': 'dfb-pokal-women',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Koln W',\n",
+ " 't2': 'Hoffenheim W',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '6',\n",
+ " 'odds1': '12.41',\n",
+ " 'oddsdraw': '6.96',\n",
+ " 'odds2': '1.16',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/germany/dfb-pokal-women/koln-hoffenheim-KpiOBwl4/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'germany',\n",
+ " 'liga': 'regionalliga-west',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Dusseldorf II',\n",
+ " 't2': 'Schalke II',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '1.76',\n",
+ " 'oddsdraw': '3.64',\n",
+ " 'odds2': '4.22',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/germany/regionalliga-west/dusseldorf-schalke-C4ed4aqc/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'AlbinoLeffe',\n",
+ " 't2': 'Olbia',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.96',\n",
+ " 'oddsdraw': '3.03',\n",
+ " 'odds2': '4.02',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/albinoleffe-olbia-h2R3cKu4/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Carrarese',\n",
+ " 't2': 'Pergolettese',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '1.51',\n",
+ " 'oddsdraw': '3.84',\n",
+ " 'odds2': '5.83',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/carrarese-pergolettese-WhQ7dveA/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Grosseto',\n",
+ " 't2': 'Novara',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.59',\n",
+ " 'oddsdraw': '2.91',\n",
+ " 'odds2': '2.76',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/grosseto-novara-0SUBebAG/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Juventus U23',\n",
+ " 't2': 'Pro Patria',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.44',\n",
+ " 'oddsdraw': '2.80',\n",
+ " 'odds2': '3.08',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/juventus-pro-patria-vJTFfIPM/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Lecco',\n",
+ " 't2': 'Pistoiese',\n",
+ " 'sc1': '4',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '1.87',\n",
+ " 'oddsdraw': '3.05',\n",
+ " 'odds2': '4.44',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/lecco-pistoiese-MsJKgxuT/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Lucchese',\n",
+ " 't2': 'Alessandria',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '4.90',\n",
+ " 'oddsdraw': '3.38',\n",
+ " 'odds2': '1.69',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/lucchese-alessandria-dzAnkdmp/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Piacenza',\n",
+ " 't2': 'Pro Sesto',\n",
+ " 'sc1': '6',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '2.57',\n",
+ " 'oddsdraw': '2.96',\n",
+ " 'odds2': '2.75',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/piacenza-pro-sesto-jc9jlG2j/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-a',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Pontedera',\n",
+ " 't2': 'Como',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '2.55',\n",
+ " 'oddsdraw': '2.87',\n",
+ " 'odds2': '2.86',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-a/us-pontedera-como-Um8fmzId/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-b',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Arezzo',\n",
+ " 't2': 'Sudtirol',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '4',\n",
+ " 'odds1': '4.11',\n",
+ " 'oddsdraw': '3.06',\n",
+ " 'odds2': '1.93',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-b/arezzo-sudtirol-EuYGQ1Fl/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-b',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Ravenna',\n",
+ " 't2': 'Padova',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '5.13',\n",
+ " 'oddsdraw': '3.49',\n",
+ " 'odds2': '1.65',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-b/ravenna-padova-ARHJpN6K/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-b',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Triestina',\n",
+ " 't2': 'Sambenedettese',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.42',\n",
+ " 'oddsdraw': '2.89',\n",
+ " 'odds2': '3.02',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-b/triestina-sambenedettese-hj7OqsMQ/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-c',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Catania',\n",
+ " 't2': 'Cavese',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '1.54',\n",
+ " 'oddsdraw': '3.58',\n",
+ " 'odds2': '6.01',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-c/catania-cavese-bkwtAyog/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-c',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Potenza',\n",
+ " 't2': 'Viterbese',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '2.48',\n",
+ " 'oddsdraw': '2.98',\n",
+ " 'odds2': '2.86',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-c/potenza-viterbese-lvWh7ZUB/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-c',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Teramo',\n",
+ " 't2': 'Vibonese',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '1.71',\n",
+ " 'oddsdraw': '3.28',\n",
+ " 'odds2': '4.96',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-c/teramo-vibonese-2szc6gpI/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'italy',\n",
+ " 'liga': 'serie-c-group-c',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Ternana',\n",
+ " 't2': 'Bisceglie',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.23',\n",
+ " 'oddsdraw': '5.42',\n",
+ " 'odds2': '11.15',\n",
+ " 'bn': '13',\n",
+ " 'link': '/soccer/italy/serie-c-group-c/ternana-bisceglie-KEhOOEPd/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'northern-ireland',\n",
+ " 'liga': 'premiership-women',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Linfield W',\n",
+ " 't2': 'Sion Swifts W',\n",
+ " 'sc1': '4',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '2.76',\n",
+ " 'oddsdraw': '4.37',\n",
+ " 'odds2': '1.96',\n",
+ " 'bn': '6',\n",
+ " 'link': '/soccer/northern-ireland/premiership-women/linfield-sion-swifts-zqU5d0SP/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'norway',\n",
+ " 'liga': 'obos-ligaen',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Ham-Kam',\n",
+ " 't2': 'Lillestrom',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '3.22',\n",
+ " 'oddsdraw': '3.36',\n",
+ " 'odds2': '2.16',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/norway/obos-ligaen/ham-kam-lillestrom-Cd7g7nZL/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 0),\n",
+ " 'country': 'tunisia',\n",
+ " 'liga': 'ligue-professionnelle-1',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Esperance Tunis',\n",
+ " 't2': 'Slimane',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '1.25',\n",
+ " 'oddsdraw': '4.90',\n",
+ " 'odds2': '11.63',\n",
+ " 'bn': '11',\n",
+ " 'link': '/soccer/tunisia/ligue-professionnelle-1/esperance-tunis-slimane-bHJYOgC0/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 15),\n",
+ " 'country': 'malta',\n",
+ " 'liga': 'division-1',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Fgura',\n",
+ " 't2': 'Naxxar Lions',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '2.91',\n",
+ " 'oddsdraw': '3.56',\n",
+ " 'odds2': '2.07',\n",
+ " 'bn': '6',\n",
+ " 'link': '/soccer/malta/division-1/fgura-naxxar-lions-hAy2kP5m/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 25),\n",
+ " 'country': 'saudi-arabia',\n",
+ " 'liga': 'saudi-professional-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Al Qadisiya',\n",
+ " 't2': 'Al-Shabab',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '4.08',\n",
+ " 'oddsdraw': '3.60',\n",
+ " 'odds2': '1.78',\n",
+ " 'bn': '12',\n",
+ " 'link': '/soccer/saudi-arabia/saudi-professional-league/al-qadisiya-al-shabab-AN5CrCF0/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 30),\n",
+ " 'country': 'montenegro',\n",
+ " 'liga': 'prva-crnogorska-liga',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Titograd',\n",
+ " 't2': 'Decic',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '3.20',\n",
+ " 'oddsdraw': '2.62',\n",
+ " 'odds2': '2.52',\n",
+ " 'bn': '12',\n",
+ " 'link': '/soccer/montenegro/prva-crnogorska-liga/ofk-titograd-decic-4tVX3AR5/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 30),\n",
+ " 'country': 'morocco',\n",
+ " 'liga': 'botola-pro',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Hassania Agadir',\n",
+ " 't2': 'IR Tanger',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.38',\n",
+ " 'oddsdraw': '2.70',\n",
+ " 'odds2': '3.38',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/morocco/botola-pro/hassania-agadir-ir-tanger-ALK1BNQf/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 30),\n",
+ " 'country': 'qatar',\n",
+ " 'liga': 'division-2',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Al-Shahaniya',\n",
+ " 't2': 'Shamal',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.24',\n",
+ " 'oddsdraw': '3.31',\n",
+ " 'odds2': '2.77',\n",
+ " 'bn': '6',\n",
+ " 'link': '/soccer/qatar/division-2/al-shahaniya-shamal-QNgUYPgd/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 14, 30),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'segunda-division-b-group-1',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Valladolid Promesas',\n",
+ " 't2': 'Lealtad',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.38',\n",
+ " 'oddsdraw': '2.87',\n",
+ " 'odds2': '3.12',\n",
+ " 'bn': '12',\n",
+ " 'link': '/soccer/spain/segunda-division-b-group-1/valladolid-promesas-lealtad-I5YR2nne/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'belgium',\n",
+ " 'liga': 'proximus-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Westerlo',\n",
+ " 't2': 'Lommel SK',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.32',\n",
+ " 'oddsdraw': '3.30',\n",
+ " 'odds2': '2.92',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/belgium/proximus-league/westerlo-lommel-sk-GhIXq0rM/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'denmark',\n",
+ " 'liga': 'superliga',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'FC Copenhagen',\n",
+ " 't2': 'Horsens',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.55',\n",
+ " 'oddsdraw': '4.20',\n",
+ " 'odds2': '5.80',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/denmark/superliga/fc-copenhagen-horsens-hOz6MGm9/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-4',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'San Ignacio',\n",
+ " 't2': 'Pasaia',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '2.05',\n",
+ " 'oddsdraw': '2.79',\n",
+ " 'odds2': '3.90',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/spain/tercera-division-group-4/san-ignacio-pasaia-K8P0Jc10/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-6',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Elche CF Ilicitano B',\n",
+ " 't2': 'Novelda CF',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.47',\n",
+ " 'oddsdraw': '3.79',\n",
+ " 'odds2': '6.50',\n",
+ " 'bn': '8',\n",
+ " 'link': '/soccer/spain/tercera-division-group-6/elche-cf-ilicitano-novelda-cf-SYvE9a1f/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-7',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'RSD Alcala',\n",
+ " 't2': 'Complutense',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.63',\n",
+ " 'oddsdraw': '3.37',\n",
+ " 'odds2': '5.32',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/spain/tercera-division-group-7/rsd-alcala-ad-alcala-UcVXKvuD/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'switzerland',\n",
+ " 'liga': 'super-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Luzern',\n",
+ " 't2': 'Young Boys',\n",
+ " 'sc1': '2',\n",
+ " 'sc2': '3',\n",
+ " 'odds1': '4.51',\n",
+ " 'oddsdraw': '3.84',\n",
+ " 'odds2': '1.73',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/switzerland/super-league/luzern-young-boys-lWHStzg3/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 0),\n",
+ " 'country': 'switzerland',\n",
+ " 'liga': 'super-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Zurich',\n",
+ " 't2': 'Lausanne',\n",
+ " 'sc1': '4',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '2.52',\n",
+ " 'oddsdraw': '3.52',\n",
+ " 'odds2': '2.64',\n",
+ " 'bn': '14',\n",
+ " 'link': '/soccer/switzerland/super-league/zurich-lausanne-I5MWuf89/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 15),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'primera-division-women',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Real Sociedad W',\n",
+ " 't2': 'Espanyol W',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.33',\n",
+ " 'oddsdraw': '4.74',\n",
+ " 'odds2': '7.85',\n",
+ " 'bn': '10',\n",
+ " 'link': '/soccer/spain/primera-division-women/real-sociedad-espanyol-h0nGVh60/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 30),\n",
+ " 'country': 'burkina-faso',\n",
+ " 'liga': 'premier-league',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'ASF Dioulasso',\n",
+ " 't2': 'Vitesse',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.68',\n",
+ " 'oddsdraw': '2.65',\n",
+ " 'odds2': '2.84',\n",
+ " 'bn': '3',\n",
+ " 'link': '/soccer/burkina-faso/premier-league/asf-dioulasso-vitesse-468EujT1/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 30),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'segunda-division-b-group-2',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Calahorra',\n",
+ " 't2': 'Ejea',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.76',\n",
+ " 'oddsdraw': '3.28',\n",
+ " 'odds2': '4.43',\n",
+ " 'bn': '12',\n",
+ " 'link': '/soccer/spain/segunda-division-b-group-2/cd-calahorra-sd-ejea-jwXixHDf/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 30),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-13',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'CAP Ciudad de Murcia',\n",
+ " 't2': 'Muleno',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '2',\n",
+ " 'odds1': '2.54',\n",
+ " 'oddsdraw': '3.17',\n",
+ " 'odds2': '2.52',\n",
+ " 'bn': '8',\n",
+ " 'link': '/soccer/spain/tercera-division-group-13/ciudad-de-murcia-muleno-cf-x2cb3f2a/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 30),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-18',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Madridejos',\n",
+ " 't2': 'Villacanas',\n",
+ " 'sc1': '0',\n",
+ " 'sc2': '1',\n",
+ " 'odds1': '2.63',\n",
+ " 'oddsdraw': '3.18',\n",
+ " 'odds2': '2.49',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/spain/tercera-division-group-18/madridejos-villacanas-Wjum1fI1/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 30),\n",
+ " 'country': 'spain',\n",
+ " 'liga': 'tercera-division-group-8',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Atl. Astorga',\n",
+ " 't2': 'La Baneza',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.21',\n",
+ " 'oddsdraw': '5.80',\n",
+ " 'odds2': '9.79',\n",
+ " 'bn': '7',\n",
+ " 'link': '/soccer/spain/tercera-division-group-8/atletico-astorga-la-baneza-td7nBMN1/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 45),\n",
+ " 'country': 'united-arab-emirates',\n",
+ " 'liga': 'presidents-cup',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Shabab Al-Ahli Dubai',\n",
+ " 't2': 'Hatta',\n",
+ " 'sc1': '3',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '1.17',\n",
+ " 'oddsdraw': '6.69',\n",
+ " 'odds2': '10.93',\n",
+ " 'bn': '11',\n",
+ " 'link': '/soccer/united-arab-emirates/presidents-cup/shabab-al-ahli-dubai-hatta-h0vpsjsE/'},\n",
+ " {'ds': datetime.datetime(2020, 12, 6, 15, 45),\n",
+ " 'country': 'united-arab-emirates',\n",
+ " 'liga': 'presidents-cup',\n",
+ " 'season': '2020/2021',\n",
+ " 't1': 'Al Dhafra',\n",
+ " 't2': 'Al Jazira',\n",
+ " 'sc1': '1',\n",
+ " 'sc2': '0',\n",
+ " 'odds1': '5.96',\n",
+ " 'oddsdraw': '4.56',\n",
+ " 'odds2': '1.41',\n",
+ " 'bn': '11',\n",
+ " 'link': '/soccer/united-arab-emirates/presidents-cup/al-dhafra-al-jazira-ba6HOsRn/'},\n",
+ " ...]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 46
+ }
+ ],
"source": [
"op.DATA"
]
@@ -1182,227 +2236,67 @@
" bookies[x]['time_close']=max([change_time[x]['0'],change_time[x]['1'],change_time[x]['2']])\n"
]
},
+ {
+ "source": [
+ "## ELO\n"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
{
"cell_type": "code",
- "execution_count": 122,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
- " mid bid w1 wx w2 move_1 move_x move_2 open_1 open_x \\\n",
- "0 EFEkIb54 417 2.36 3.14 3.34 D D U 3.18 3.20 \n",
- "1 EFEkIb54 453 2.36 3.14 3.34 D D U 3.18 3.20 \n",
- "2 EFEkIb54 9 2.44 3.20 3.37 D D U 3.27 3.30 \n",
- "3 EFEkIb54 32 2.49 3.17 3.30 D D U 3.27 3.30 \n",
- "4 EFEkIb54 141 2.52 3.05 3.00 D U U 3.00 3.00 \n",
- "5 EFEkIb54 160 2.52 3.05 2.95 D D U 3.10 3.15 \n",
- "6 EFEkIb54 73 2.46 3.20 3.32 D D U 3.29 3.34 \n",
- "7 EFEkIb54 455 2.46 3.20 3.32 D D U 3.29 3.34 \n",
- "8 EFEkIb54 454 2.49 3.07 3.10 D D U 3.17 3.20 \n",
- "9 EFEkIb54 429 2.55 3.10 3.00 D U U 3.00 3.00 \n",
- "10 EFEkIb54 149 2.40 3.05 3.20 D D U 3.15 3.15 \n",
- "11 EFEkIb54 443 2.55 3.05 3.00 D D U 3.15 3.15 \n",
- "12 EFEkIb54 1 2.55 3.05 3.00 D D U 3.15 3.15 \n",
- "13 EFEkIb54 21 2.40 3.20 3.25 D U U 3.10 3.10 \n",
- "14 EFEkIb54 446 2.44 3.11 3.23 D D U 3.20 3.22 \n",
- "15 EFEkIb54 76 2.38 3.10 3.13 D D U 3.13 3.20 \n",
- "16 EFEkIb54 468 2.47 3.24 3.48 D U U 3.25 3.20 \n",
- "17 EFEkIb54 46 2.58 3.13 3.05 D D U 3.05 3.15 \n",
- "18 EFEkIb54 163 2.58 3.13 3.05 D D U 3.05 3.15 \n",
- "19 EFEkIb54 44 2.52 3.20 3.35 U U U 2.10 2.20 \n",
- "20 EFEkIb54 139 2.55 3.00 2.92 D U U 2.60 2.90 \n",
- "21 EFEkIb54 419 2.40 3.20 3.40 D N U 2.75 3.20 \n",
- "22 EFEkIb54 16 2.40 3.20 3.40 D N U 2.75 3.20 \n",
- "23 EFEkIb54 383 2.40 3.05 3.15 D D U 2.70 3.10 \n",
- "24 EFEkIb54 464 2.40 3.05 3.15 D D U 2.70 3.10 \n",
- "25 EFEkIb54 414 2.45 3.15 3.25 D U U 2.70 3.10 \n",
- "26 EFEkIb54 14 2.35 3.00 3.05 D D U 2.65 3.05 \n",
- "27 EFEkIb54 43 2.40 3.15 3.30 D D U 2.95 3.20 \n",
- "28 EFEkIb54 472 2.40 3.10 3.20 D D U 2.90 3.20 \n",
- "29 EFEkIb54 3 2.36 2.99 3.10 D D U 2.81 3.11 \n",
- "30 EFEkIb54 30 2.40 3.00 3.20 D D U 2.80 3.10 \n",
- "31 EFEkIb54 57 2.40 3.10 3.20 D U U 2.50 2.80 \n",
- "32 EFEkIb54 381 2.40 3.20 3.38 D D U 2.96 3.25 \n",
- "33 EFEkIb54 531 2.40 3.20 3.38 D D U 2.96 3.25 \n",
- "34 EFEkIb54 157 2.40 3.20 3.40 D U U 2.80 3.10 \n",
- "35 EFEkIb54 27 2.40 3.20 3.40 D U U 2.80 3.10 \n",
- "36 EFEkIb54 5 2.55 3.20 3.15 D U U 2.80 3.10 \n",
- "37 EFEkIb54 26 2.38 3.10 3.00 D N U 2.88 3.10 \n",
- "38 EFEkIb54 33 2.38 3.10 3.25 D N U 2.80 3.10 \n",
- "39 EFEkIb54 24 2.35 3.10 3.25 D N U 2.80 3.10 \n",
- "40 EFEkIb54 56 2.38 3.15 3.25 D D U 2.83 3.25 \n",
- "41 EFEkIb54 476 2.38 3.14 3.30 D D U 2.83 3.25 \n",
- "42 EFEkIb54 372 2.35 3.10 3.20 D N U 2.88 3.10 \n",
- "43 EFEkIb54 15 2.35 3.10 3.20 D N U 2.88 3.10 \n",
- "44 EFEkIb54 31 2.35 2.95 3.10 D D U 2.70 3.05 \n",
- "45 EFEkIb54 45 2.46 3.22 3.25 D U U 2.80 2.90 \n",
- "46 EFEkIb54 49 2.46 3.22 3.25 D U U 2.80 2.90 \n",
- "47 EFEkIb54 411 2.46 3.22 3.25 D U U 2.80 2.90 \n",
- "48 EFEkIb54 392 2.40 3.10 3.20 D D U 2.60 3.20 \n",
- "49 EFEkIb54 128 2.22 2.85 2.95 D D U 2.55 3.10 \n",
- "50 EFEkIb54 129 2.30 2.95 3.10 D D U 2.50 3.00 \n",
- "51 EFEkIb54 2 2.55 3.10 3.00 D D U 2.60 3.20 \n",
- "52 EFEkIb54 75 2.40 3.10 2.94 D U U 2.59 3.00 \n",
- "53 EFEkIb54 147 2.42 3.00 3.00 D N U 2.53 3.00 \n",
- "54 EFEkIb54 18 2.41 3.19 3.37 D D U 2.67 3.35 \n",
- "55 EFEkIb54 390 2.44 3.22 3.50 D D U 2.62 3.26 \n",
- "56 EFEkIb54 164 2.58 3.05 3.05 U D U 2.52 3.20 \n",
- "\n",
- " open_2 time_open time_close \n",
- "0 2.36 1552887983 1553629298 \n",
- "1 2.36 1552888069 1553629352 \n",
- "2 2.43 1552983711 1553629497 \n",
- "3 2.43 1552983720 1553629038 \n",
- "4 2.25 1552983754 1553596748 \n",
- "5 2.33 1552984330 1553585881 \n",
- "6 2.40 1552984587 1553629318 \n",
- "7 2.40 1552984594 1553629322 \n",
- "8 2.37 1552984740 1553629065 \n",
- "9 2.20 1552985819 1553622736 \n",
- "10 2.35 1552986787 1553629429 \n",
- "11 2.35 1552986870 1553626610 \n",
- "12 2.35 1552986870 1553626610 \n",
- "13 2.30 1552987677 1553629310 \n",
- "14 2.39 1552988529 1553629171 \n",
- "15 2.38 1552989803 1553629497 \n",
- "16 2.35 1552990344 1553629182 \n",
- "17 2.30 1553012411 1553628437 \n",
- "18 2.30 1553012526 1553628529 \n",
- "19 1.75 1553014703 1553629155 \n",
- "20 2.30 1553071250 1553624495 \n",
- "21 2.60 1553082138 1553629164 \n",
- "22 2.60 1553082185 1553629240 \n",
- "23 2.50 1553092129 1553629081 \n",
- "24 2.50 1553092223 1553629249 \n",
- "25 2.50 1553092226 1553629092 \n",
- "26 2.45 1553092231 1553628926 \n",
- "27 2.60 1553095157 1553629051 \n",
- "28 2.55 1553100539 1553629352 \n",
- "29 2.48 1553104632 1553628932 \n",
- "30 2.63 1553114899 1553628977 \n",
- "31 2.60 1553115166 1553629187 \n",
- "32 2.63 1553152122 1553629188 \n",
- "33 2.63 1553152215 1553629200 \n",
- "34 2.70 1553164682 1553629458 \n",
- "35 2.70 1553164736 1553629334 \n",
- "36 2.70 1553164969 1553628749 \n",
- "37 2.50 1553170241 1553628771 \n",
- "38 2.65 1553197618 1553629426 \n",
- "39 2.65 1553197633 1553629444 \n",
- "40 2.57 1553270849 1553628819 \n",
- "41 2.57 1553271084 1553629502 \n",
- "42 2.55 1553274548 1553629443 \n",
- "43 2.55 1553274694 1553629262 \n",
- "44 2.50 1553276758 1553629022 \n",
- "45 2.60 1553381633 1553629401 \n",
- "46 2.60 1553381735 1553629413 \n",
- "47 2.60 1553381850 1553629428 \n",
- "48 2.80 1553414694 1553629342 \n",
- "49 2.75 1553414761 1553629158 \n",
- "50 2.70 1553414890 1553629349 \n",
- "51 2.80 1553414933 1553549223 \n",
- "52 2.76 1553419600 1553629445 \n",
- "53 2.84 1553419703 1553629333 \n",
- "54 2.80 1553427181 1553629466 \n",
- "55 3.00 1553467860 1553629485 \n",
- "56 3.00 1553555049 1553628534 "
- ],
- "text/html": "
\n\n
\n \n \n \n mid \n bid \n w1 \n wx \n w2 \n move_1 \n move_x \n move_2 \n open_1 \n open_x \n open_2 \n time_open \n time_close \n \n \n \n \n 0 \n EFEkIb54 \n 417 \n 2.36 \n 3.14 \n 3.34 \n D \n D \n U \n 3.18 \n 3.20 \n 2.36 \n 1552887983 \n 1553629298 \n \n \n 1 \n EFEkIb54 \n 453 \n 2.36 \n 3.14 \n 3.34 \n D \n D \n U \n 3.18 \n 3.20 \n 2.36 \n 1552888069 \n 1553629352 \n \n \n 2 \n EFEkIb54 \n 9 \n 2.44 \n 3.20 \n 3.37 \n D \n D \n U \n 3.27 \n 3.30 \n 2.43 \n 1552983711 \n 1553629497 \n \n \n 3 \n EFEkIb54 \n 32 \n 2.49 \n 3.17 \n 3.30 \n D \n D \n U \n 3.27 \n 3.30 \n 2.43 \n 1552983720 \n 1553629038 \n \n \n 4 \n EFEkIb54 \n 141 \n 2.52 \n 3.05 \n 3.00 \n D \n U \n U \n 3.00 \n 3.00 \n 2.25 \n 1552983754 \n 1553596748 \n \n \n 5 \n EFEkIb54 \n 160 \n 2.52 \n 3.05 \n 2.95 \n D \n D \n U \n 3.10 \n 3.15 \n 2.33 \n 1552984330 \n 1553585881 \n \n \n 6 \n EFEkIb54 \n 73 \n 2.46 \n 3.20 \n 3.32 \n D \n D \n U \n 3.29 \n 3.34 \n 2.40 \n 1552984587 \n 1553629318 \n \n \n 7 \n EFEkIb54 \n 455 \n 2.46 \n 3.20 \n 3.32 \n D \n D \n U \n 3.29 \n 3.34 \n 2.40 \n 1552984594 \n 1553629322 \n \n \n 8 \n EFEkIb54 \n 454 \n 2.49 \n 3.07 \n 3.10 \n D \n D \n U \n 3.17 \n 3.20 \n 2.37 \n 1552984740 \n 1553629065 \n \n \n 9 \n EFEkIb54 \n 429 \n 2.55 \n 3.10 \n 3.00 \n D \n U \n U \n 3.00 \n 3.00 \n 2.20 \n 1552985819 \n 1553622736 \n \n \n 10 \n EFEkIb54 \n 149 \n 2.40 \n 3.05 \n 3.20 \n D \n D \n U \n 3.15 \n 3.15 \n 2.35 \n 1552986787 \n 1553629429 \n \n \n 11 \n EFEkIb54 \n 443 \n 2.55 \n 3.05 \n 3.00 \n D \n D \n U \n 3.15 \n 3.15 \n 2.35 \n 1552986870 \n 1553626610 \n \n \n 12 \n EFEkIb54 \n 1 \n 2.55 \n 3.05 \n 3.00 \n D \n D \n U \n 3.15 \n 3.15 \n 2.35 \n 1552986870 \n 1553626610 \n \n \n 13 \n EFEkIb54 \n 21 \n 2.40 \n 3.20 \n 3.25 \n D \n U \n U \n 3.10 \n 3.10 \n 2.30 \n 1552987677 \n 1553629310 \n \n \n 14 \n EFEkIb54 \n 446 \n 2.44 \n 3.11 \n 3.23 \n D \n D \n U \n 3.20 \n 3.22 \n 2.39 \n 1552988529 \n 1553629171 \n \n \n 15 \n EFEkIb54 \n 76 \n 2.38 \n 3.10 \n 3.13 \n D \n D \n U \n 3.13 \n 3.20 \n 2.38 \n 1552989803 \n 1553629497 \n \n \n 16 \n EFEkIb54 \n 468 \n 2.47 \n 3.24 \n 3.48 \n D \n U \n U \n 3.25 \n 3.20 \n 2.35 \n 1552990344 \n 1553629182 \n \n \n 17 \n EFEkIb54 \n 46 \n 2.58 \n 3.13 \n 3.05 \n D \n D \n U \n 3.05 \n 3.15 \n 2.30 \n 1553012411 \n 1553628437 \n \n \n 18 \n EFEkIb54 \n 163 \n 2.58 \n 3.13 \n 3.05 \n D \n D \n U \n 3.05 \n 3.15 \n 2.30 \n 1553012526 \n 1553628529 \n \n \n 19 \n EFEkIb54 \n 44 \n 2.52 \n 3.20 \n 3.35 \n U \n U \n U \n 2.10 \n 2.20 \n 1.75 \n 1553014703 \n 1553629155 \n \n \n 20 \n EFEkIb54 \n 139 \n 2.55 \n 3.00 \n 2.92 \n D \n U \n U \n 2.60 \n 2.90 \n 2.30 \n 1553071250 \n 1553624495 \n \n \n 21 \n EFEkIb54 \n 419 \n 2.40 \n 3.20 \n 3.40 \n D \n N \n U \n 2.75 \n 3.20 \n 2.60 \n 1553082138 \n 1553629164 \n \n \n 22 \n EFEkIb54 \n 16 \n 2.40 \n 3.20 \n 3.40 \n D \n N \n U \n 2.75 \n 3.20 \n 2.60 \n 1553082185 \n 1553629240 \n \n \n 23 \n EFEkIb54 \n 383 \n 2.40 \n 3.05 \n 3.15 \n D \n D \n U \n 2.70 \n 3.10 \n 2.50 \n 1553092129 \n 1553629081 \n \n \n 24 \n EFEkIb54 \n 464 \n 2.40 \n 3.05 \n 3.15 \n D \n D \n U \n 2.70 \n 3.10 \n 2.50 \n 1553092223 \n 1553629249 \n \n \n 25 \n EFEkIb54 \n 414 \n 2.45 \n 3.15 \n 3.25 \n D \n U \n U \n 2.70 \n 3.10 \n 2.50 \n 1553092226 \n 1553629092 \n \n \n 26 \n EFEkIb54 \n 14 \n 2.35 \n 3.00 \n 3.05 \n D \n D \n U \n 2.65 \n 3.05 \n 2.45 \n 1553092231 \n 1553628926 \n \n \n 27 \n EFEkIb54 \n 43 \n 2.40 \n 3.15 \n 3.30 \n D \n D \n U \n 2.95 \n 3.20 \n 2.60 \n 1553095157 \n 1553629051 \n \n \n 28 \n EFEkIb54 \n 472 \n 2.40 \n 3.10 \n 3.20 \n D \n D \n U \n 2.90 \n 3.20 \n 2.55 \n 1553100539 \n 1553629352 \n \n \n 29 \n EFEkIb54 \n 3 \n 2.36 \n 2.99 \n 3.10 \n D \n D \n U \n 2.81 \n 3.11 \n 2.48 \n 1553104632 \n 1553628932 \n \n \n 30 \n EFEkIb54 \n 30 \n 2.40 \n 3.00 \n 3.20 \n D \n D \n U \n 2.80 \n 3.10 \n 2.63 \n 1553114899 \n 1553628977 \n \n \n 31 \n EFEkIb54 \n 57 \n 2.40 \n 3.10 \n 3.20 \n D \n U \n U \n 2.50 \n 2.80 \n 2.60 \n 1553115166 \n 1553629187 \n \n \n 32 \n EFEkIb54 \n 381 \n 2.40 \n 3.20 \n 3.38 \n D \n D \n U \n 2.96 \n 3.25 \n 2.63 \n 1553152122 \n 1553629188 \n \n \n 33 \n EFEkIb54 \n 531 \n 2.40 \n 3.20 \n 3.38 \n D \n D \n U \n 2.96 \n 3.25 \n 2.63 \n 1553152215 \n 1553629200 \n \n \n 34 \n EFEkIb54 \n 157 \n 2.40 \n 3.20 \n 3.40 \n D \n U \n U \n 2.80 \n 3.10 \n 2.70 \n 1553164682 \n 1553629458 \n \n \n 35 \n EFEkIb54 \n 27 \n 2.40 \n 3.20 \n 3.40 \n D \n U \n U \n 2.80 \n 3.10 \n 2.70 \n 1553164736 \n 1553629334 \n \n \n 36 \n EFEkIb54 \n 5 \n 2.55 \n 3.20 \n 3.15 \n D \n U \n U \n 2.80 \n 3.10 \n 2.70 \n 1553164969 \n 1553628749 \n \n \n 37 \n EFEkIb54 \n 26 \n 2.38 \n 3.10 \n 3.00 \n D \n N \n U \n 2.88 \n 3.10 \n 2.50 \n 1553170241 \n 1553628771 \n \n \n 38 \n EFEkIb54 \n 33 \n 2.38 \n 3.10 \n 3.25 \n D \n N \n U \n 2.80 \n 3.10 \n 2.65 \n 1553197618 \n 1553629426 \n \n \n 39 \n EFEkIb54 \n 24 \n 2.35 \n 3.10 \n 3.25 \n D \n N \n U \n 2.80 \n 3.10 \n 2.65 \n 1553197633 \n 1553629444 \n \n \n 40 \n EFEkIb54 \n 56 \n 2.38 \n 3.15 \n 3.25 \n D \n D \n U \n 2.83 \n 3.25 \n 2.57 \n 1553270849 \n 1553628819 \n \n \n 41 \n EFEkIb54 \n 476 \n 2.38 \n 3.14 \n 3.30 \n D \n D \n U \n 2.83 \n 3.25 \n 2.57 \n 1553271084 \n 1553629502 \n \n \n 42 \n EFEkIb54 \n 372 \n 2.35 \n 3.10 \n 3.20 \n D \n N \n U \n 2.88 \n 3.10 \n 2.55 \n 1553274548 \n 1553629443 \n \n \n 43 \n EFEkIb54 \n 15 \n 2.35 \n 3.10 \n 3.20 \n D \n N \n U \n 2.88 \n 3.10 \n 2.55 \n 1553274694 \n 1553629262 \n \n \n 44 \n EFEkIb54 \n 31 \n 2.35 \n 2.95 \n 3.10 \n D \n D \n U \n 2.70 \n 3.05 \n 2.50 \n 1553276758 \n 1553629022 \n \n \n 45 \n EFEkIb54 \n 45 \n 2.46 \n 3.22 \n 3.25 \n D \n U \n U \n 2.80 \n 2.90 \n 2.60 \n 1553381633 \n 1553629401 \n \n \n 46 \n EFEkIb54 \n 49 \n 2.46 \n 3.22 \n 3.25 \n D \n U \n U \n 2.80 \n 2.90 \n 2.60 \n 1553381735 \n 1553629413 \n \n \n 47 \n EFEkIb54 \n 411 \n 2.46 \n 3.22 \n 3.25 \n D \n U \n U \n 2.80 \n 2.90 \n 2.60 \n 1553381850 \n 1553629428 \n \n \n 48 \n EFEkIb54 \n 392 \n 2.40 \n 3.10 \n 3.20 \n D \n D \n U \n 2.60 \n 3.20 \n 2.80 \n 1553414694 \n 1553629342 \n \n \n 49 \n EFEkIb54 \n 128 \n 2.22 \n 2.85 \n 2.95 \n D \n D \n U \n 2.55 \n 3.10 \n 2.75 \n 1553414761 \n 1553629158 \n \n \n 50 \n EFEkIb54 \n 129 \n 2.30 \n 2.95 \n 3.10 \n D \n D \n U \n 2.50 \n 3.00 \n 2.70 \n 1553414890 \n 1553629349 \n \n \n 51 \n EFEkIb54 \n 2 \n 2.55 \n 3.10 \n 3.00 \n D \n D \n U \n 2.60 \n 3.20 \n 2.80 \n 1553414933 \n 1553549223 \n \n \n 52 \n EFEkIb54 \n 75 \n 2.40 \n 3.10 \n 2.94 \n D \n U \n U \n 2.59 \n 3.00 \n 2.76 \n 1553419600 \n 1553629445 \n \n \n 53 \n EFEkIb54 \n 147 \n 2.42 \n 3.00 \n 3.00 \n D \n N \n U \n 2.53 \n 3.00 \n 2.84 \n 1553419703 \n 1553629333 \n \n \n 54 \n EFEkIb54 \n 18 \n 2.41 \n 3.19 \n 3.37 \n D \n D \n U \n 2.67 \n 3.35 \n 2.80 \n 1553427181 \n 1553629466 \n \n \n 55 \n EFEkIb54 \n 390 \n 2.44 \n 3.22 \n 3.50 \n D \n D \n U \n 2.62 \n 3.26 \n 3.00 \n 1553467860 \n 1553629485 \n \n \n 56 \n EFEkIb54 \n 164 \n 2.58 \n 3.05 \n 3.05 \n U \n D \n U \n 2.52 \n 3.20 \n 3.00 \n 1553555049 \n 1553628534 \n \n \n
\n
"
+ "'http://api.clubelo.com/2015-01-02'"
+ ]
},
"metadata": {},
- "execution_count": 122
+ "execution_count": 5
}
],
"source": [
- "pd.DataFrame([bookies[x] for x in bookies])\n"
+ "d= datetime(2015, 1, 2)\n",
+ "'http://api.clubelo.com/{:%Y-%m-%d}'.format(d)"
]
},
{
"cell_type": "code",
- "execution_count": 127,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
- "from op_parser import OpParser\n",
+ "dates=[]\n",
+ "d= datetime(2015, 1, 4)\n",
+ "end_date= datetime(2021, 1, 10)\n",
"\n",
- "op=OpParser()\n",
- "op.parse_matches()"
+ "while d<=end_date:\n",
+ " r = requests.get('http://api.clubelo.com/{:%Y-%m-%d}'.format(d), allow_redirects=True)\n",
+ " open('data/elo/elo_{:%Y-%m-%d}.csv'.format(d), 'wb').write(r.content)\n",
+ " #time.sleep(random.uniform(1, 5))\n",
+ " d+=timedelta(days=1)\n",
+ " #break"
]
},
{
"cell_type": "code",
- "execution_count": 128,
+ "execution_count": 7,
"metadata": {},
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "[ mid bid w1 wx w2 move_1 move_x move_2 open_1 open_x \\\n",
- " 0 003ZibWr 1 1.90 3.20 3.40 N N N NaN NaN \n",
- " 1 003ZibWr 2 1.95 3.30 3.50 D U U 2.00 3.25 \n",
- " 2 003ZibWr 3 1.80 3.40 3.55 D U U 1.95 3.20 \n",
- " 3 003ZibWr 5 1.85 3.40 3.60 N N N 1.85 3.40 \n",
- " 4 003ZibWr 9 1.85 3.25 3.70 N N N NaN NaN \n",
- " 5 003ZibWr 14 1.85 3.32 3.71 D U U 1.94 3.24 \n",
- " 6 003ZibWr 15 1.80 3.50 4.00 N N N NaN NaN \n",
- " 7 003ZibWr 16 1.91 3.60 3.80 N U D 1.91 3.40 \n",
- " 8 003ZibWr 24 1.80 3.50 3.70 D U U 1.88 3.35 \n",
- " 9 003ZibWr 26 1.95 3.20 3.60 U D D 1.85 3.30 \n",
- " 10 003ZibWr 30 1.91 3.25 3.40 U N D 1.83 3.25 \n",
- " 11 003ZibWr 32 1.75 3.50 3.75 D U U 1.95 3.20 \n",
- " 12 003ZibWr 33 1.75 3.40 4.05 N N N NaN NaN \n",
- " 13 003ZibWr 43 1.78 3.45 3.65 D U U 1.85 3.30 \n",
- " 14 003ZibWr 44 2.02 3.45 4.09 U U U 1.01 1.01 \n",
- " 15 003ZibWr 46 1.80 3.40 3.83 N N N NaN NaN \n",
- " 16 003ZibWr 53 1.95 3.30 3.50 D U U 2.00 3.25 \n",
- " 17 003ZibWr 56 1.92 3.35 3.80 U U D 1.81 3.25 \n",
- " 18 003ZibWr 57 1.91 3.40 3.75 D U U 1.98 3.25 \n",
- " 19 003ZibWr 73 1.88 3.34 3.70 U N D 1.74 3.34 \n",
- " 20 003ZibWr 75 1.93 3.40 3.75 D U U 1.98 3.30 \n",
- " 21 003ZibWr 76 1.91 3.50 4.00 D U U 1.95 3.30 \n",
- " 22 003ZibWr 128 1.90 3.45 3.80 D U U 2.00 3.35 \n",
- " 23 003ZibWr 147 2.02 3.29 3.56 U D D 1.92 3.35 \n",
- " 24 003ZibWr 149 1.90 3.20 3.40 N N N NaN NaN \n",
- " 25 003ZibWr 157 1.85 3.40 3.60 N N N 1.85 3.40 \n",
- " \n",
- " open_2 time_close time_open \n",
- " 0 NaN 1345044717 NaN \n",
- " 1 3.40 1345281013 1.344847e+09 \n",
- " 2 3.25 1345194456 1.344867e+09 \n",
- " 3 3.60 1345278408 1.344996e+09 \n",
- " 4 NaN 1345022128 NaN \n",
- " 5 3.47 1345283951 1.344931e+09 \n",
- " 6 NaN 1345223766 NaN \n",
- " 7 4.00 1345280634 1.344939e+09 \n",
- " 8 3.55 1345163795 1.345081e+09 \n",
- " 9 3.90 1345279745 1.345024e+09 \n",
- " 10 3.75 1345283938 1.345030e+09 \n",
- " 11 3.50 1345210531 1.344946e+09 \n",
- " 12 NaN 1345240798 NaN \n",
- " 13 3.50 1345163835 1.345081e+09 \n",
- " 14 1.01 1345284037 1.344542e+09 \n",
- " 15 NaN 1345184204 NaN \n",
- " 16 3.40 1345280954 1.344847e+09 \n",
- " 17 4.15 1345283983 1.345201e+09 \n",
- " 18 3.40 1345284306 1.344933e+09 \n",
- " 19 4.51 1345282541 1.345274e+09 \n",
- " 20 3.65 1345284072 1.344937e+09 \n",
- " 21 3.60 1345282426 1.344933e+09 \n",
- " 22 3.50 1345283188 1.344932e+09 \n",
- " 23 3.84 1345283661 1.344973e+09 \n",
- " 24 NaN 1345044698 NaN \n",
- " 25 3.60 1345278302 1.344995e+09 ]"
- ]
- },
- "metadata": {},
- "execution_count": 128
- }
- ],
+ "outputs": [],
"source": [
- "op.DATA"
+ "def load_json(fn, did, headers, isft=0):\n",
+ " file_name='raw/{}_{}_{:%Y-%m-%d-%H%M}.json'.format(fn, did, datetime.now()) if fn=='votes' else f'raw/{fn}_{did}.json'\n",
+ " if not path.exists(file_name) or (fn=='votes' and isft==0):\n",
+ " script='' if fn=='event' else '/provider/1/'+fn if fn=='winning-odds' else '/'+fn\n",
+ " link=f'{api_url}event/{did}{script}'\n",
+ " r = requests.get(link, headers=headers)\n",
+ " if r.status_code==200:\n",
+ " with open(file_name, 'w+', encoding='utf8') as f:\n",
+ " f.write(r.text)"
]
},
{
@@ -1410,7 +2304,13 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "COUNTRIES=['england', 'france', 'greece', 'spain', 'italy', 'portugal', 'mexico', 'asia', 'scotland', 'netherlands', 'belgium', 'africa',\n",
+ " 'turkey', 'australia', 'argentina', 'germany', 'switzerland', 'poland', 'austria', 'europe', 'south-america', 'denmark',\n",
+ " 'ukraine', 'usa', 'russia', 'japan', 'bulgaria', 'lithuania', 'world', 'sweden', 'norway', 'romania', 'brazil', 'estonia',\n",
+ " 'slovakia', 'north-central-america', 'finland', 'serbia', 'slovenia', 'china', 'hungary', 'czech-republic', 'chile',\n",
+ " 'belarus', 'croatia', 'paraguay', 'cyprus', 'uruguay', 'ireland', 'colombia', 'south-korea', 'ecuador']\n"
+ ]
}
]
}
\ No newline at end of file
diff --git a/dl.py b/dl.py
index 53211e1..eb6b95d 100644
--- a/dl.py
+++ b/dl.py
@@ -4,23 +4,28 @@
from shutil import move
import pandas as pd
import numpy as np
-from datetime import datetime
-from data_provider import DataProvider
-from sofa_parser import SofaScoreParser
-from fbref_parser import FbrefParser
+from datetime import datetime,timedelta
+from api.data_provider import DataProvider
+from api.sofa_parser import SofaScoreParser
+from api.fbref_parser import FbrefParser
+from api.op_parser import OpParser
from tqdm import tqdm
dp=DataProvider()
if __name__ == '__main__':
- if len(sys.argv) == 3:
- ds=sys.argv[1]
- de=sys.argv[2]
+ if len(sys.argv) == 2:
+ ds=de='{:%Y-%m-%d}'.format(datetime.today()-timedelta(days=1))
+ elif len(sys.argv) == 3:
+ ds=de=sys.argv[2]
elif len(sys.argv) == 4:
ds=sys.argv[2]
de=sys.argv[3]
- elif len(sys.argv) == 2:
- ds=de=sys.argv[1]
+ else:
+ df=pd.read_csv('data/sofa/matches_done.csv')
+ ds=df.ts.max()[:10]
+ de='{:%Y-%m-%d}'.format(datetime.today()-timedelta(days=1))
+ #de='2020-12-02'
if sys.argv[1]=='d':
dp.load_days(ds, de)
@@ -34,8 +39,61 @@
elif sys.argv[1]=='fm':
dp.load_fbref_matches()
elif sys.argv[1]=='fd':
- ssp=SofaScoreParser()
- ssp.parse_matches()
+ dp.load_fbref_days(ds, de)
+ elif sys.argv[1]=='fdp':
+ p=FbrefParser()
+ p.parse_days()
+ elif sys.argv[1]=='fmp':
+ p=FbrefParser()
+ p.parse_matches()
+ elif sys.argv[1]=='od':
+ dp.load_op_days(ds, de)
+ elif sys.argv[1]=='odp':
+ p=OpParser()
+ p.parse_days()
elif sys.argv[1]=='om':
dp.load_op_matches()
-
\ No newline at end of file
+ elif sys.argv[1]=='a':
+ ssp=SofaScoreParser()
+ fbp=FbrefParser()
+ opp=OpParser()
+ print('*'*20)
+ print(' LOAD DAYS')
+ print('*'*20)
+ print('-'*5,' Sofa ','-'*5)
+ dp.load_days(ds, de)
+ print('-'*5,' Fbref ','-'*5)
+ dp.load_fbref_days(ds, de)
+ print('-'*5,' OP ','-'*5)
+ dp.load_op_days(ds, de)
+ print('-'*5,' ELO ','-'*5)
+ dp.load_elos(ds, de)
+
+ print('*'*20)
+ print(' PARSE DAYS')
+ print('*'*20)
+
+ print('-'*5,' Fbref ','-'*5)
+ fbp.parse_days()
+ print('-'*5,' OP ','-'*5)
+ opp.parse_days()
+
+ print('*'*20)
+ print(' LOAD MATCHES')
+ print('*'*20)
+ print('-'*5,' Sofa ','-'*5)
+ dp.load_matches()
+ print('-'*5,' Fbref ','-'*5)
+ dp.load_fbref_matches()
+ print('-'*5,' OP ','-'*5)
+ dp.load_op_matches()
+
+ print('*'*20)
+ print(' PARSE MATCHES')
+ print('*'*20)
+ print('-'*5,' Sofa ','-'*5)
+ ssp.parse_matches()
+ print('-'*5,' Fbref ','-'*5)
+ fbp.parse_matches()
+ print('-'*5,' OP ','-'*5)
+ opp.parse_matches()
\ No newline at end of file
diff --git a/models/1024.keras b/models/1024.keras
new file mode 100644
index 0000000..b6e52d0
Binary files /dev/null and b/models/1024.keras differ
diff --git a/models/512-1024-1024-512.keras b/models/512-1024-1024-512.keras
new file mode 100644
index 0000000..21cc816
Binary files /dev/null and b/models/512-1024-1024-512.keras differ
diff --git a/models/512-1024-512.keras b/models/512-1024-512.keras
new file mode 100644
index 0000000..60bb76a
Binary files /dev/null and b/models/512-1024-512.keras differ
diff --git a/models/512-1024-8roi-welltrained.keras b/models/512-1024-8roi-welltrained.keras
new file mode 100644
index 0000000..1c3e21f
Binary files /dev/null and b/models/512-1024-8roi-welltrained.keras differ
diff --git a/models/512-1024.keras b/models/512-1024.keras
new file mode 100644
index 0000000..4205024
Binary files /dev/null and b/models/512-1024.keras differ
diff --git a/models/512-2048-10roi.keras b/models/512-2048-10roi.keras
new file mode 100644
index 0000000..1a1a49b
Binary files /dev/null and b/models/512-2048-10roi.keras differ
diff --git a/models/op_1024_512_64_16.keras b/models/op_1024_512_64_16.keras
new file mode 100644
index 0000000..8d2df83
Binary files /dev/null and b/models/op_1024_512_64_16.keras differ
diff --git a/models/op_1024_512_64_16_changedDrift.keras b/models/op_1024_512_64_16_changedDrift.keras
new file mode 100644
index 0000000..2aa31c6
Binary files /dev/null and b/models/op_1024_512_64_16_changedDrift.keras differ
diff --git a/op.ipynb b/op.ipynb
new file mode 100644
index 0000000..f66da54
--- /dev/null
+++ b/op.ipynb
@@ -0,0 +1,541 @@
+{
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5-final"
+ },
+ "orig_nbformat": 2,
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.5 64-bit ('mlenv': conda)",
+ "metadata": {
+ "interpreter": {
+ "hash": "12f2fd9a8da6c9ddda222d67ff20ee53b82617d5a9ac88eb47f60b586ce1b05e"
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n %reload_ext autoreload\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import pickle\n",
+ "from datetime import datetime,timedelta\n",
+ "from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler\n",
+ "from sklearn.model_selection import RepeatedKFold,train_test_split\n",
+ "import tensorflow as tf\n",
+ "from tensorflow import keras\n",
+ "from tensorflow.keras.layers import Dense, BatchNormalization,Dropout\n",
+ "from tensorflow.keras.models import Sequential\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "import api.util\n",
+ "from api.predictions_converter import PredictionsConverter\n",
+ "from api.op_dp import OpDataProvider\n",
+ "\n",
+ "from IPython.display import display\n",
+ "pd.options.display.max_columns = None\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "classes=['winner_away', 'winner_draw', 'winner_home']\n",
+ "dp=OpDataProvider(load=False, exclude=['country_id'])\n",
+ "data, labels, info, df=dp.provide_data()\n",
+ "#df=dp._load_data()\n",
+ "#df=dp._provide_odds()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " bn country ds liga mid \\\n",
+ "0 0.461538 greece 2014-01-18 15:15:00+00:00 super-league xzLYjsqg \n",
+ "1 0.461538 spain 2013-02-16 17:00:00+00:00 segunda-division M5Tqms6i \n",
+ "2 0.461538 germany 2013-09-28 13:30:00+00:00 bundesliga EZp8Xc0a \n",
+ "3 0.846154 france 2019-01-13 20:00:00+00:00 ligue-1 UJjJ4QAp \n",
+ "4 0.538462 germany 2015-02-01 14:30:00+00:00 bundesliga G25l1ArF \n",
+ "\n",
+ " odds_away odds_draw odds_home sc1 sc2 t1 \\\n",
+ "0 27.83 11.00 1.06 2 0 olympiacos piraeus \n",
+ "1 2.47 3.16 2.85 2 2 guadalajara \n",
+ "2 17.23 8.23 1.15 1 0 bayern munich \n",
+ "3 4.42 3.91 1.78 1 1 marseille \n",
+ "4 3.35 3.45 2.21 2 0 werder bremen \n",
+ "\n",
+ " t2 tid1 tid2 winner oddsprob_home oddsprob_draw \\\n",
+ "0 levadiakos 0 506 home 0.881373 0.085437 \n",
+ "1 almeria 1 1444 draw 0.333431 0.298337 \n",
+ "2 wolfsburg 2 64 home 0.824268 0.117686 \n",
+ "3 monaco 3 399 draw 0.535805 0.249926 \n",
+ "4 hertha berlin 4 420 home 0.428344 0.277868 \n",
+ "\n",
+ " oddsprob_away drift_home drift_away drift_draw \n",
+ "0 0.033191 -0.072516 0.139235 0.086659 \n",
+ "1 0.368232 -0.055178 0.054568 -0.016432 \n",
+ "2 0.058046 -0.016230 0.091752 0.039680 \n",
+ "3 0.214269 0.097888 -0.233988 -0.080705 \n",
+ "4 0.293787 -0.098189 0.105136 0.010880 "
+ ],
+ "text/html": "
\n\n
\n \n \n \n bn \n country \n ds \n liga \n mid \n odds_away \n odds_draw \n odds_home \n sc1 \n sc2 \n t1 \n t2 \n tid1 \n tid2 \n winner \n oddsprob_home \n oddsprob_draw \n oddsprob_away \n drift_home \n drift_away \n drift_draw \n \n \n \n \n 0 \n 0.461538 \n greece \n 2014-01-18 15:15:00+00:00 \n super-league \n xzLYjsqg \n 27.83 \n 11.00 \n 1.06 \n 2 \n 0 \n olympiacos piraeus \n levadiakos \n 0 \n 506 \n home \n 0.881373 \n 0.085437 \n 0.033191 \n -0.072516 \n 0.139235 \n 0.086659 \n \n \n 1 \n 0.461538 \n spain \n 2013-02-16 17:00:00+00:00 \n segunda-division \n M5Tqms6i \n 2.47 \n 3.16 \n 2.85 \n 2 \n 2 \n guadalajara \n almeria \n 1 \n 1444 \n draw \n 0.333431 \n 0.298337 \n 0.368232 \n -0.055178 \n 0.054568 \n -0.016432 \n \n \n 2 \n 0.461538 \n germany \n 2013-09-28 13:30:00+00:00 \n bundesliga \n EZp8Xc0a \n 17.23 \n 8.23 \n 1.15 \n 1 \n 0 \n bayern munich \n wolfsburg \n 2 \n 64 \n home \n 0.824268 \n 0.117686 \n 0.058046 \n -0.016230 \n 0.091752 \n 0.039680 \n \n \n 3 \n 0.846154 \n france \n 2019-01-13 20:00:00+00:00 \n ligue-1 \n UJjJ4QAp \n 4.42 \n 3.91 \n 1.78 \n 1 \n 1 \n marseille \n monaco \n 3 \n 399 \n draw \n 0.535805 \n 0.249926 \n 0.214269 \n 0.097888 \n -0.233988 \n -0.080705 \n \n \n 4 \n 0.538462 \n germany \n 2015-02-01 14:30:00+00:00 \n bundesliga \n G25l1ArF \n 3.35 \n 3.45 \n 2.21 \n 2 \n 0 \n werder bremen \n hertha berlin \n 4 \n 420 \n home \n 0.428344 \n 0.277868 \n 0.293787 \n -0.098189 \n 0.105136 \n 0.010880 \n \n \n
\n
"
+ },
+ "metadata": {},
+ "execution_count": 150
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "source": [
+ "# Analysis"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "data_train, data_test, labels_train, labels_test, info_train, info_test = train_test_split(data, labels, info, test_size=0.2, random_state=42)\n",
+ "print(data_train.shape, data_test.shape)"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 51,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(119680, 4) (29921, 4)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "source": [
+ "df.isnull().any()"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "- **prevalence** - percent of winners\n",
+ "- **Sensitivity** is the probability that our test outputs positive given that the case is actually positive.\n",
+ "- **Specificity** is the probability that the test outputs negative given that the case is actually negative.\n",
+ "- **Positive predictive value (PPV)** is the probability that subjects with a positive prediction truly wins.\n",
+ "- **Negative predictive value (NPV)** is the probability that subjects with a negative prediction truly lost.\n",
+ "- **The area under the ROC** curve is also called AUCROC or C-statistic and is a measure of goodness of fit. \n",
+ "- **F1 score** is the harmonic mean of the precision and recall, where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0."
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_model(n_inputs, n_outputs):\n",
+ " model = Sequential()\n",
+ " model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dropout(0.2))\n",
+ " model.add(Dense(512, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dropout(0.2))\n",
+ " model.add(Dense(64, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(16, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(n_outputs, activation='softmax'))\n",
+ " model.compile(loss='binary_crossentropy', optimizer='adam')\n",
+ " return model\n",
+ "\n",
+ "def evaluate_model(X, y):\n",
+ " results = list()\n",
+ " n_inputs, n_outputs = X.shape[1], y.shape[1]\n",
+ " # define evaluation procedure\n",
+ " cv = RepeatedKFold(n_splits=3, n_repeats=2, random_state=1)\n",
+ " # enumerate folds\n",
+ " for train_ix, test_ix in cv.split(X):\n",
+ " # prepare data\n",
+ " X_train, X_test = X[train_ix], X[test_ix]\n",
+ " y_train, y_test = y[train_ix], y[test_ix]\n",
+ " # define model\n",
+ " model = get_model(n_inputs, n_outputs)\n",
+ " # fit model\n",
+ " model.fit(X_train, y_train, epochs=30)\n",
+ " # make a prediction on the test set\n",
+ " yhat = model.predict(X_test)\n",
+ " # round probabilities to class labels\n",
+ " yhat = yhat.round()\n",
+ " # calculate accuracy\n",
+ " acc = accuracy_score(y_test, yhat)\n",
+ " # store result\n",
+ " print('>%.3f' % acc)\n",
+ " results.append(acc)\n",
+ " break\n",
+ " return results, model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.6088\n",
+ "Epoch 2/30\n",
+ "2494/2494 [==============================] - 35s 14ms/step - loss: 0.5917\n",
+ "Epoch 3/30\n",
+ "2494/2494 [==============================] - 36s 15ms/step - loss: 0.5888\n",
+ "Epoch 4/30\n",
+ "2494/2494 [==============================] - 35s 14ms/step - loss: 0.5884\n",
+ "Epoch 5/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5865\n",
+ "Epoch 6/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5847\n",
+ "Epoch 7/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5856\n",
+ "Epoch 8/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5840\n",
+ "Epoch 9/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5841\n",
+ "Epoch 10/30\n",
+ "2494/2494 [==============================] - 36s 14ms/step - loss: 0.5846\n",
+ "Epoch 11/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5851\n",
+ "Epoch 12/30\n",
+ "2494/2494 [==============================] - 36s 14ms/step - loss: 0.5839\n",
+ "Epoch 13/30\n",
+ "2494/2494 [==============================] - 34s 14ms/step - loss: 0.5841\n",
+ "Epoch 14/30\n",
+ "2494/2494 [==============================] - 34s 14ms/step - loss: 0.5846\n",
+ "Epoch 15/30\n",
+ "2494/2494 [==============================] - 34s 14ms/step - loss: 0.5823\n",
+ "Epoch 16/30\n",
+ "2494/2494 [==============================] - 34s 13ms/step - loss: 0.5833\n",
+ "Epoch 17/30\n",
+ "2494/2494 [==============================] - 32s 13ms/step - loss: 0.5829\n",
+ "Epoch 18/30\n",
+ "2494/2494 [==============================] - 32s 13ms/step - loss: 0.5834\n",
+ "Epoch 19/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5826\n",
+ "Epoch 20/30\n",
+ "2494/2494 [==============================] - 34s 14ms/step - loss: 0.5836\n",
+ "Epoch 21/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.5837\n",
+ "Epoch 22/30\n",
+ "2494/2494 [==============================] - 34s 14ms/step - loss: 0.5827\n",
+ "Epoch 23/30\n",
+ "2494/2494 [==============================] - 33s 13ms/step - loss: 0.5813\n",
+ "Epoch 24/30\n",
+ "2494/2494 [==============================] - 31s 12ms/step - loss: 0.5837\n",
+ "Epoch 25/30\n",
+ "2494/2494 [==============================] - 32s 13ms/step - loss: 0.5842\n",
+ "Epoch 26/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.5824\n",
+ "Epoch 27/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.5823\n",
+ "Epoch 28/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.5814\n",
+ "Epoch 29/30\n",
+ "2494/2494 [==============================] - 37s 15ms/step - loss: 0.5816\n",
+ "Epoch 30/30\n",
+ "2494/2494 [==============================] - 38s 15ms/step - loss: 0.5822\n",
+ ">0.388\n",
+ "Accuracy: 0.388 (0.000)\n"
+ ]
+ }
+ ],
+ "source": [
+ "#results = evaluate_model(data, labels)\n",
+ "results, model = evaluate_model(data_train, labels_train)\n",
+ "\n",
+ "# summarize performance\n",
+ "print('Accuracy: %.3f (%.3f)' % (np.mean(results), np.std(results)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yhat = model.predict(data_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 348,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#model.save('models/op_1024_512_64_16_changedDrift.keras')\n",
+ "model = keras.models.load_model('models/op_1024_512_64_16_changedDrift.keras')\n",
+ "yhat = model.predict(data_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 349,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from IPython.display import display\n",
+ "class PredictionsConverter:\n",
+ " def __init__(self, provider, yhat, y, info):\n",
+ " self.CLASSES=['HOME','DRAW','AWAY']\n",
+ " self.DATA_PATH=f'predictions/{provider}/'\n",
+ " self.LABELS_PREDICTED=yhat\n",
+ " self.LABELS=y\n",
+ " self.INFO=info.copy()\n",
+ "\n",
+ " def make_df(self, threshold=0.5):\n",
+ " df_yhat=pd.DataFrame(data=self.LABELS_PREDICTED, columns=['prob_away', 'prob_draw', 'prob_home'])\n",
+ " df_y=pd.DataFrame(data=self.LABELS, columns=['winner_away', 'winner_draw', 'winner_home'])\n",
+ " df_i=self.INFO.reset_index(drop=True)\n",
+ " df_preds=pd.concat([df_i,df_y,df_yhat], axis=1)\n",
+ " if threshold=='max':\n",
+ " a=df_yhat.rank(method='max', axis=1)\n",
+ " df_preds['pred_home']=a['prob_home'].apply(lambda x: 1 if x>2 else 0)\n",
+ " df_preds['pred_draw']=a['prob_draw'].apply(lambda x: 1 if x>2 else 0)\n",
+ " df_preds['pred_away']=a['prob_away'].apply(lambda x: 1 if x>2 else 0)\n",
+ " else:\n",
+ " df_preds['pred_home']=np.where(df_preds['prob_home']>threshold,1,0)\n",
+ " df_preds['pred_draw']=np.where(df_preds['prob_draw']>threshold,1,0)\n",
+ " df_preds['pred_away']=np.where(df_preds['prob_away']>threshold,1,0)\n",
+ " df_preds=df_preds[(df_preds['pred_home']==1) | (df_preds['pred_draw']==1) |(df_preds['pred_away']==1)]\n",
+ " df_preds['winner_home']=df_preds['winner_home'].astype(int)\n",
+ " df_preds['winner_draw']=df_preds['winner_draw'].astype(int)\n",
+ " df_preds['winner_away']=df_preds['winner_away'].astype(int)\n",
+ " df_preds['pred_home']=df_preds['pred_home'].astype(int)\n",
+ " df_preds['pred_draw']=df_preds['pred_draw'].astype(int)\n",
+ " df_preds['pred_away']=df_preds['pred_away'].astype(int)\n",
+ " df_preds['win']=0\n",
+ " df_preds.loc[(df_preds['winner_home']==df_preds['pred_home']) & (df_preds['winner_home']==1),'win']=1\n",
+ " df_preds.loc[(df_preds['winner_draw']==df_preds['pred_draw']) & (df_preds['winner_draw']==1),'win']=1\n",
+ " df_preds.loc[(df_preds['winner_away']==df_preds['pred_away']) & (df_preds['winner_away']==1),'win']=1\n",
+ " df_preds.loc[df_preds['pred_home']==1,'odds']=df_preds['odds_home']\n",
+ " df_preds.loc[df_preds['pred_draw']==1,'odds']=df_preds['odds_draw']\n",
+ " df_preds.loc[df_preds['pred_away']==1,'odds']=df_preds['odds_away']\n",
+ " df_preds=df_preds.drop_duplicates()\n",
+ " df_preds['prf']=np.where(df_preds.win>0,df_preds.odds-1, -1)\n",
+ " self.Y=df_preds[['winner_home','winner_draw','winner_away']].values\n",
+ " self.YHAT=df_preds[['pred_home','pred_draw','pred_away']].values\n",
+ " self.DF=df_preds[['ds', 'country', 'liga', 't1', 't2', 'sc1', 'sc2', 'odds_home', 'odds_draw', 'odds_away','winner_home', 'winner_draw', 'winner_away','pred_home','pred_draw','pred_away','prob_home', 'prob_draw', 'prob_away','win','prf']]\n",
+ " \n",
+ " def performance_metrics(self):\n",
+ " display(util.get_performance_metrics(self.Y, self.YHAT, self.CLASSES))\n",
+ " \n",
+ " def graph(self,mode='tpfp'):\n",
+ " if mode == 'tpfp':\n",
+ " util.get_curve(self.Y, self.YHAT, self.CLASSES)\n",
+ " elif mode== 'prc':\n",
+ " util.get_curve(self.Y, self.YHAT, self.CLASSES, curve='prc')\n",
+ " \n",
+ " def profit(self):\n",
+ " print(self.DF.win.mean(), self.DF.prf.sum(), self.DF.prf.sum()/len(self.DF.index),len(self.DF.index))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def odds2prob(df):\n",
+ " df['odds_away']=1/df['odds_away']\n",
+ " df['odds_draw']=1/df['odds_draw']\n",
+ " df['odds_home']=1/df['odds_home']\n",
+ " df['margin']=df[['odds_away','odds_draw','odds_home']].sum(axis=1)\n",
+ " df['odds_away']=df['odds_away']/df['margin']\n",
+ " df['odds_draw']=df['odds_draw']/df['margin']\n",
+ " df['odds_home']=df['odds_home']/df['margin']\n",
+ " return df[['odds_away','odds_draw','odds_home']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0.6513761467889908 -311.53 -0.024017423483154728 12971\n0.5670935008456148 -619.1300000000001 -0.029916888137231222 20695\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 6512 2445 3540 474 0.691 0.539 0.932 \n1 DRAW 3 10363 4 2601 0.799 0.201 0.001 \n2 AWAY 1934 8612 978 1447 0.813 0.261 0.572 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.409 0.648 0.838 0.670 0.764 0.5 \n1 1.000 0.429 0.799 0.500 0.002 0.5 \n2 0.898 0.664 0.856 0.735 0.615 0.5 ",
+ "text/html": "
\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 6512 \n 2445 \n 3540 \n 474 \n 0.691 \n 0.539 \n 0.932 \n 0.409 \n 0.648 \n 0.838 \n 0.670 \n 0.764 \n 0.5 \n \n \n 1 \n DRAW \n 3 \n 10363 \n 4 \n 2601 \n 0.799 \n 0.201 \n 0.001 \n 1.000 \n 0.429 \n 0.799 \n 0.500 \n 0.002 \n 0.5 \n \n \n 2 \n AWAY \n 1934 \n 8612 \n 978 \n 1447 \n 0.813 \n 0.261 \n 0.572 \n 0.898 \n 0.664 \n 0.856 \n 0.735 \n 0.615 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 9216 3419 7180 880 0.611 0.488 0.913 \n1 DRAW 1 15980 1 4713 0.772 0.228 0.000 \n2 AWAY 2519 13032 1778 3366 0.751 0.284 0.428 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.323 0.562 0.795 0.618 0.696 0.5 \n1 1.000 0.500 0.772 0.500 0.000 0.5 \n2 0.880 0.586 0.795 0.654 0.495 0.5 ",
+ "text/html": "
\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 9216 \n 3419 \n 7180 \n 880 \n 0.611 \n 0.488 \n 0.913 \n 0.323 \n 0.562 \n 0.795 \n 0.618 \n 0.696 \n 0.5 \n \n \n 1 \n DRAW \n 1 \n 15980 \n 1 \n 4713 \n 0.772 \n 0.228 \n 0.000 \n 1.000 \n 0.500 \n 0.772 \n 0.500 \n 0.000 \n 0.5 \n \n \n 2 \n AWAY \n 2519 \n 13032 \n 1778 \n 3366 \n 0.751 \n 0.284 \n 0.428 \n 0.880 \n 0.586 \n 0.795 \n 0.654 \n 0.495 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "conv_bookies=PredictionsConverter('op', odds2prob(info_test.copy()).values, labels_test, info_test.copy())\n",
+ "conv_bookies.make_df()\n",
+ "conv=PredictionsConverter('op', yhat, labels_test, info_test.copy())\n",
+ "conv.make_df()\n",
+ "\n",
+ "conv_bookies.profit()\n",
+ "conv.profit()\n",
+ "conv_bookies.performance_metrics()\n",
+ "conv.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0.5259534075336743 -1171.42 -0.03915304655904275 29919\n0.5121494702363046 -1210.19 -0.04044887863899194 29919\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 11350 6460 9881 2228 0.595 0.454 0.836 \n1 DRAW 23 22561 30 7305 0.755 0.245 0.003 \n2 AWAY 4363 16568 4338 4650 0.700 0.301 0.484 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.395 0.535 0.744 0.616 0.652 0.5 \n1 0.999 0.434 0.755 0.501 0.006 0.5 \n2 0.792 0.501 0.781 0.638 0.493 0.5 ",
+ "text/html": "
\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 11350 \n 6460 \n 9881 \n 2228 \n 0.595 \n 0.454 \n 0.836 \n 0.395 \n 0.535 \n 0.744 \n 0.616 \n 0.652 \n 0.5 \n \n \n 1 \n DRAW \n 23 \n 22561 \n 30 \n 7305 \n 0.755 \n 0.245 \n 0.003 \n 0.999 \n 0.434 \n 0.755 \n 0.501 \n 0.006 \n 0.5 \n \n \n 2 \n AWAY \n 4363 \n 16568 \n 4338 \n 4650 \n 0.700 \n 0.301 \n 0.484 \n 0.792 \n 0.501 \n 0.781 \n 0.638 \n 0.493 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 11115 6448 9893 2463 0.587 0.454 0.819 \n1 DRAW 2 22589 2 7326 0.755 0.245 0.000 \n2 AWAY 4206 16205 4701 4807 0.682 0.301 0.467 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.395 0.529 0.724 0.607 0.643 0.5 \n1 1.000 0.500 0.755 0.500 0.001 0.5 \n2 0.775 0.472 0.771 0.621 0.469 0.5 ",
+ "text/html": "
\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 11115 \n 6448 \n 9893 \n 2463 \n 0.587 \n 0.454 \n 0.819 \n 0.395 \n 0.529 \n 0.724 \n 0.607 \n 0.643 \n 0.5 \n \n \n 1 \n DRAW \n 2 \n 22589 \n 2 \n 7326 \n 0.755 \n 0.245 \n 0.000 \n 1.000 \n 0.500 \n 0.755 \n 0.500 \n 0.001 \n 0.5 \n \n \n 2 \n AWAY \n 4206 \n 16205 \n 4701 \n 4807 \n 0.682 \n 0.301 \n 0.467 \n 0.775 \n 0.472 \n 0.771 \n 0.621 \n 0.469 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "conv_bookies1=PredictionsConverter('op', odds2prob(info_test.copy()).values, labels_test, info_test.copy())\n",
+ "conv_bookies1.make_df(threshold='max')\n",
+ "conv1=PredictionsConverter('op', yhat, labels_test, info_test.copy())\n",
+ "conv1.make_df(threshold='max')\n",
+ "\n",
+ "conv_bookies1.profit()\n",
+ "conv1.profit()\n",
+ "conv_bookies1.performance_metrics()\n",
+ "conv1.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 238,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": "
",
+ "image/svg+xml": "\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-01-10T23:33:32.949500 \r\n image/svg+xml \r\n \r\n \r\n Matplotlib v3.3.2, https://matplotlib.org/ \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n",
+ "image/png": "\n"
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ],
+ "source": [
+ "conv.graph(mode='prc')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res.drop_duplicates().to_csv('data/opres.csv', index=False)"
+ ]
+ },
+ {
+ "source": [
+ "## Data manipulations"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "COUNTRIES=['england', 'france', 'greece', 'spain', 'italy', 'portugal', 'mexico', 'asia', 'scotland', 'netherlands', 'belgium', \n",
+ " 'turkey', 'argentina', 'germany', 'switzerland', 'poland', 'austria', 'europe', 'south-america', 'denmark',\n",
+ " 'ukraine', 'usa', 'russia', 'japan', 'bulgaria', 'lithuania', 'sweden', 'norway', 'romania', 'brazil', 'estonia',\n",
+ " 'slovakia', 'north-central-america', 'finland', 'serbia', 'slovenia', 'china', 'hungary', 'czech-republic', 'chile',\n",
+ " 'belarus', 'croatia', 'paraguay', 'cyprus', 'uruguay', 'ireland', 'colombia', 'south-korea', 'ecuador']\n",
+ "df1=pd.read_csv('data/op/matches.csv', index_col=None)\n",
+ "df1=df1[df1.country.isin(COUNTRIES)]\n",
+ "df1.drop_duplicates().to_csv('data/op/matches.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/pred.ipynb b/pred.ipynb
new file mode 100644
index 0000000..403c10f
--- /dev/null
+++ b/pred.ipynb
@@ -0,0 +1,905 @@
+{
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5-final"
+ },
+ "orig_nbformat": 2,
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.5 64-bit ('mlenv': conda)",
+ "metadata": {
+ "interpreter": {
+ "hash": "12f2fd9a8da6c9ddda222d67ff20ee53b82617d5a9ac88eb47f60b586ce1b05e"
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import pickle\n",
+ "\n",
+ "from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler\n",
+ "from sklearn.model_selection import RepeatedKFold,train_test_split\n",
+ "import shap\n",
+ "import tensorflow as tf\n",
+ "from tensorflow import keras\n",
+ "import tensorflow.keras.backend \n",
+ "from tensorflow.keras.layers import Dense, BatchNormalization,Dropout\n",
+ "from tensorflow.keras.models import Sequential\n",
+ "from tensorflow.keras.callbacks import ModelCheckpoint\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "\n",
+ "import api.util\n",
+ "from api.predictions_converter import PredictionsConverter\n",
+ "from api.sofa_dp import SofaDataProvider\n",
+ "\n",
+ "from IPython.display import display\n",
+ "pd.options.display.max_columns = None\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def checkpoint(name):\n",
+ " return ModelCheckpoint(f'checkpoints/model_{name}.hdf5', monitor='val_acc', verbose=0, save_best_only=True, mode='max')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_all=pd.read_csv('data/stats_generated.csv', index_col=None)\n",
+ "df_all['vop1']=df_all['vote1']-df_all['oddsprob_home']\n",
+ "df_all['vopx']=df_all['votex']-df_all['oddsprob_draw']\n",
+ "df_all['vop2']=df_all['vote2']-df_all['oddsprob_away']\n"
+ ]
+ },
+ {
+ "source": [
+ "list(df_all.columns)"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "includes=['vote1', 'votex', 'vote2','elo1','elo2','drift_home', 'drift_away', 'drift_draw','oddsprob_home', 'oddsprob_draw', 'oddsprob_away']\n",
+ "cols=[x for x in df_all.columns if x!='round.1' and x!='ds.1']\n",
+ "cols=[x for x in cols if not any(i in x for i in includes)]\n",
+ "cols=[x for x in cols if not 'ht1' in x and not 'ht2' in x and not 'ps_ht' in x]\n",
+ "cols=[x for x in cols if not 'tar_' in x and not 'opp_' in x]\n",
+ "#cols1=[x for x in df_all.columns if '_form' in x]\n",
+ "cols=cols+includes\n"
+ ]
+ },
+ {
+ "source": [
+ "list(cols)"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['tar_w2_tt_avg', 'tar_ft1_tt_avg', 'tar_ft2_tt_avg', 'tar_ps_ft_tt_avg',\n",
+ " 'tar_oddsprob_home_tt_avg', 'tar_oddsprob_draw_tt_avg',\n",
+ " 'tar_oddsprob_away_tt_avg', 'tar_w1_tt_form', 'tar_wx_tt_form',\n",
+ " 'tar_w2_tt_form',\n",
+ " ...\n",
+ " 'vote1', 'votex', 'vote2', 'elo1', 'elo2', 'drift_home', 'drift_away',\n",
+ " 'drift_draw', 'sch', 'sca'],\n",
+ " dtype='object', length=121)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 95
+ }
+ ],
+ "source": [
+ "df.columns[start:]"
+ ]
+ },
+ {
+ "source": [
+ "nulls=pd.DataFrame(df.isna().sum(), columns=['n'])\n",
+ "#nulls[nulls.n>10000].to_csv('data/nulls.csv')\n",
+ "nulls[nulls.n>10000].index"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 96,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index([], dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 96
+ }
+ ]
+ },
+ {
+ "source": [
+ "cols"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "df[df.tid1==594].to_csv('data/ttt1.csv', index=False)\n",
+ "df[df.tid1==1499].to_csv('data/ttt2.csv', index=False)\n",
+ "df[df.mid==9270007].to_csv('data/ttt.csv', index=False)"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "((135580, 182), (135580, 30))"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 47
+ }
+ ],
+ "source": [
+ "COL_CUR=['side', 'country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']\n",
+ "COL_PREV=['w1', 'wx', 'w2', 'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft']\n",
+ "COL_CAT=['country_id','form1', 'form2']\n",
+ "COL_BIN=['side']\n",
+ "COL_INF=['country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2', 'ft1', 'ft2','winner','odds_away','odds_draw','odds_home']\n",
+ "\n",
+ "\n",
+ "#df=pd.read_csv('data/stats_generated.csv', index_col=None)\n",
+ "start=29\n",
+ "df=df_all[cols]\n",
+ "df['elo1'].fillna((df['elo1'].mean()), inplace=True)\n",
+ "df['elo2'].fillna((df['elo2'].mean()), inplace=True)\n",
+ "nulls=pd.DataFrame(df.isna().sum(), columns=['n'])\n",
+ "cols_null=[x for x in nulls[nulls.n>60000].index if x not in COL_INF]\n",
+ "cols=[x for x in cols if x not in cols_null]\n",
+ "\n",
+ "\n",
+ "for col in COL_INF:\n",
+ " df.loc[df[col].isnull(),col]=0\n",
+ "\n",
+ "#df=df.dropna()\n",
+ "df=df.fillna(0)\n",
+ "#[df[col].fillna(df[col].mean(), inplace=True) for col in df.columns[start:]]\n",
+ "\n",
+ "scaler=MinMaxScaler()\n",
+ "nums=scaler.fit_transform(df[df.columns[start:]].values)\n",
+ "nums_df=pd.DataFrame(nums, columns=df.columns[start:])\n",
+ "df.reset_index(drop=True, inplace=True)\n",
+ "df=pd.concat([df[df.columns[:start]],nums_df], axis=1)\n",
+ "\n",
+ "nums1=df[['vote1', 'votex', 'vote2',]].values\n",
+ "\n",
+ "df_info=df[COL_INF]\n",
+ "\n",
+ "encoder = OneHotEncoder()\n",
+ "pop_r=encoder.fit_transform(df[['pop_r']]).toarray()\n",
+ "#side=df[['side']].values\n",
+ "rounds=encoder.fit_transform(df[['round']]).toarray()\n",
+ "countries=encoder.fit_transform(df[['country_id']]).toarray()\n",
+ "encoder = OneHotEncoder()\n",
+ "form1=encoder.fit_transform(df[['form1']]).toarray()\n",
+ "encoder = OneHotEncoder()\n",
+ "form2=encoder.fit_transform(df[['form2']]).toarray()\n",
+ "side=df[['side']].values\n",
+ "\n",
+ "#data=np.hstack([nums,nums1,pop_r,rounds,countries,form1,form2])\n",
+ "data=np.hstack([nums,nums1,side,pop_r,rounds,countries,form1,form2])\n",
+ "\n",
+ "df['gd']=df['ft1']-df['ft2']\n",
+ "df['gd']=np.where(df['gd']>5,6,df['gd'])\n",
+ "df['gd']=np.where(df['gd']<-5,-6,df['gd'])\n",
+ "scgd=pd.get_dummies(df['gd'], prefix='gd')\n",
+ "\n",
+ "df['sch']=np.where(df['ft1']>5,6,df['ft1'])\n",
+ "df['sca']=np.where(df['ft2']>5,6,df['ft2'])\n",
+ "sch=pd.get_dummies(df['sch'], prefix='sch')\n",
+ "sca=pd.get_dummies(df['sca'], prefix='sca')\n",
+ "labels=np.hstack([df[['w1', 'wx', 'w2']].values,sch.values,sca.values,scgd.values])\n",
+ "#labels=np.hstack([scgd.values])\n",
+ "#labels=df[['w1', 'wx', 'w2']].values\n",
+ "\n",
+ "data.shape,labels.shape"
+ ]
+ },
+ {
+ "source": [
+ "# Analysis"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "data_train, data_test, labels_train, labels_test, info_train, info_test = train_test_split(data, labels, df_info, test_size=0.2, random_state=42)\n",
+ "print(data_train.shape, data_test.shape)"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 48,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(108464, 182) (27116, 182)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([[1, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 1, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 1, ..., 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 1, ..., 0, 0, 0],\n",
+ " [1, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 1, ..., 0, 0, 0]], dtype=int64)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 168
+ }
+ ],
+ "source": [
+ "labels_train"
+ ]
+ },
+ {
+ "source": [
+ "df.isnull().any()"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_model(n_inputs, n_outputs):\n",
+ " model = Sequential()\n",
+ " #model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(512, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(BatchNormalization())\n",
+ " #model.add(Dropout(0.4))\n",
+ " model.add(Dense(512, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dropout(0.4))\n",
+ " #model.add(Dense(64, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dense(n_outputs, activation='softmax'))\n",
+ " model.add(Dense(n_outputs, activation='sigmoid'))\n",
+ " #model.compile(loss='mean_squared_error', optimizer='adam', metrics = ['accuracy'])\n",
+ " model.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy'])\n",
+ " #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])\n",
+ " return model\n",
+ "\n",
+ "def evaluate_model(X, y, bs=64):\n",
+ " results = list()\n",
+ " n_inputs, n_outputs = X.shape[1], y.shape[1]\n",
+ " # define evaluation procedure\n",
+ " cv = RepeatedKFold(n_splits=3, n_repeats=2, random_state=1)\n",
+ " # enumerate folds\n",
+ " for train_ix, val_ix in cv.split(X):\n",
+ " # prepare data\n",
+ " X_train, X_val = X[train_ix], X[val_ix]\n",
+ " y_train, y_val = y[train_ix], y[val_ix]\n",
+ " # define model\n",
+ " model = get_model(n_inputs, n_outputs)\n",
+ " # fit model\n",
+ " model.fit(X_train, y_train, batch_size = bs, epochs=30)\n",
+ " # make a prediction on the test set\n",
+ " yhat = model.predict(X_val)\n",
+ " # round probabilities to class labels\n",
+ " yhat = yhat.round()\n",
+ " # calculate accuracy\n",
+ " acc = accuracy_score(y_val, yhat)\n",
+ " # store result\n",
+ " print('>%.3f' % acc)\n",
+ " results.append(acc)\n",
+ " #break\n",
+ " return results, model"
+ ]
+ },
+ {
+ "source": [
+ "model.save('models/512-1024-1024-512.keras')"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.3145 - accuracy: 0.4463\n",
+ "Epoch 2/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.3040 - accuracy: 0.4754\n",
+ "Epoch 3/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.3032 - accuracy: 0.4768\n",
+ "Epoch 4/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.3013 - accuracy: 0.4731\n",
+ "Epoch 5/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2991 - accuracy: 0.4699\n",
+ "Epoch 6/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2946 - accuracy: 0.4662\n",
+ "Epoch 7/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2883 - accuracy: 0.4752\n",
+ "Epoch 8/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.2796 - accuracy: 0.4758\n",
+ "Epoch 9/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2667 - accuracy: 0.5029\n",
+ "Epoch 10/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2504 - accuracy: 0.5232\n",
+ "Epoch 11/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2326 - accuracy: 0.5462\n",
+ "Epoch 12/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.2129 - accuracy: 0.5699\n",
+ "Epoch 13/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.1929 - accuracy: 0.5965\n",
+ "Epoch 14/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.1759 - accuracy: 0.6054\n",
+ "Epoch 15/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.1574 - accuracy: 0.6187\n",
+ "Epoch 16/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.1428 - accuracy: 0.6254\n",
+ "Epoch 17/30\n",
+ "1130/1130 [==============================] - 12s 11ms/step - loss: 0.1300 - accuracy: 0.6332\n",
+ "Epoch 18/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.1169 - accuracy: 0.6386\n",
+ "Epoch 19/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.1048 - accuracy: 0.6455\n",
+ "Epoch 20/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.0944 - accuracy: 0.6296\n",
+ "Epoch 21/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0875 - accuracy: 0.6278\n",
+ "Epoch 22/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0791 - accuracy: 0.6250\n",
+ "Epoch 23/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0731 - accuracy: 0.6308\n",
+ "Epoch 24/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0661 - accuracy: 0.6212\n",
+ "Epoch 25/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.0620 - accuracy: 0.6167\n",
+ "Epoch 26/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0558 - accuracy: 0.6056\n",
+ "Epoch 27/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0535 - accuracy: 0.6057\n",
+ "Epoch 28/30\n",
+ "1130/1130 [==============================] - 12s 10ms/step - loss: 0.0494 - accuracy: 0.6007\n",
+ "Epoch 29/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.0443 - accuracy: 0.6046\n",
+ "Epoch 30/30\n",
+ "1130/1130 [==============================] - 11s 10ms/step - loss: 0.0430 - accuracy: 0.5938\n",
+ ">0.024\n",
+ "Epoch 1/30\n",
+ " 363/1130 [========>.....................] - ETA: 7s - loss: 0.3267 - accuracy: 0.4009"
+ ]
+ },
+ {
+ "output_type": "error",
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mevaluate_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;31m# summarize performance\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Accuracy: %.3f (%.3f)'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36mevaluate_model\u001b[1;34m(X, y, bs)\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_outputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[1;31m# fit model\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 33\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 34\u001b[0m \u001b[1;31m# make a prediction on the test set\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[0myhat\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_val\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[0;32m 1098\u001b[0m _r=1):\n\u001b[0;32m 1099\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1100\u001b[1;33m \u001b[0mtmp_logs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1101\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1102\u001b[0m \u001b[0mcontext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 827\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtrace\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTrace\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtm\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 828\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 829\u001b[0m \u001b[0mcompiler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"xla\"\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_experimental_compile\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;34m\"nonXla\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 830\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 853\u001b[0m \u001b[1;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 854\u001b[0m \u001b[1;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 855\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=not-callable\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 856\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 857\u001b[0m \u001b[1;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2940\u001b[0m (graph_function,\n\u001b[0;32m 2941\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[1;32m-> 2942\u001b[1;33m return graph_function._call_flat(\n\u001b[0m\u001b[0;32m 2943\u001b[0m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0;32m 2944\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1916\u001b[0m and executing_eagerly):\n\u001b[0;32m 1917\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1918\u001b[1;33m return self._build_call_outputs(self._inference_function.call(\n\u001b[0m\u001b[0;32m 1919\u001b[0m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0;32m 1920\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 553\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0m_InterpolateFunctionError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 554\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcancellation_manager\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 555\u001b[1;33m outputs = execute.execute(\n\u001b[0m\u001b[0;32m 556\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msignature\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 557\u001b[0m \u001b[0mnum_outputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_num_outputs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32mC:\\ProgramData\\Miniconda3\\envs\\mlenv\\lib\\site-packages\\tensorflow\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 59\u001b[1;33m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[0;32m 60\u001b[0m inputs, attrs, num_outputs)\n\u001b[0;32m 61\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "results, model = evaluate_model(data_train, labels_train, bs=64)\n",
+ "# summarize performance\n",
+ "print('Accuracy: %.3f (%.3f)' % (np.mean(results), np.std(results)))"
+ ]
+ },
+ {
+ "source": [
+ "background = data_train[np.random.choice(data_train.shape[0], 100, replace=False)]\n",
+ "explainer = shap.DeepExplainer(model, background)"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yhat = model.predict(data_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def odds2prob(df):\n",
+ " df['odds_away']=1/df['odds_away']\n",
+ " df['odds_draw']=1/df['odds_draw']\n",
+ " df['odds_home']=1/df['odds_home']\n",
+ " df['margin']=df[['odds_away','odds_draw','odds_home']].sum(axis=1)\n",
+ " df['odds_away']=df['odds_away']/df['margin']\n",
+ " df['odds_draw']=df['odds_draw']/df['margin']\n",
+ " df['odds_home']=df['odds_home']/df['margin']\n",
+ " return df[['odds_away','odds_draw','odds_home']]\n",
+ "\n",
+ "\n",
+ "def softmax(df, columns):\n",
+ " df['margin']=df[columns].sum(axis=1)\n",
+ " for x in columns:\n",
+ " df[x]=df[x]/df['margin']\n",
+ " df=df.drop(columns=['margin'])\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([4.8954438e-02, 1.9956774e-01, 7.9421538e-01, 4.9211684e-01,\n",
+ " 3.7973702e-01, 1.0061966e-01, 1.4319265e-02, 9.1202662e-04,\n",
+ " 1.7471502e-04, 4.5027200e-06, 7.3556311e-02, 2.7749127e-01,\n",
+ " 3.0257180e-01, 1.8635319e-01, 8.8486604e-02, 3.7683818e-02,\n",
+ " 1.7281987e-02], dtype=float32)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 132
+ }
+ ],
+ "source": [
+ "yhat[2]"
+ ]
+ },
+ {
+ "source": [
+ "coly=np.hstack([['w1','wx','w2'],sch.columns,sca.columns,scgd.columns])\n",
+ "#coly=scgd.columns\n",
+ "colyp=[x+'_p' for x in coly]\n",
+ "df_y=pd.DataFrame(data=labels_test[:,3:], columns=coly[3:])\n",
+ "#df_y=pd.DataFrame(data=labels_test, columns=coly)\n",
+ "df_yhat=pd.DataFrame(data=yhat, columns=colyp)\n",
+ "info_test=info_test.rename(columns={'ft1':'sc1','ft2':'sc2'})\n",
+ "info_test=info_test.reset_index(drop=True)\n",
+ "df_preds=pd.concat([info_test,df_y,df_yhat], axis=1)\n",
+ "#df_preds=softmax(df_preds,['w1_p','wx_p','w2_p'])\n",
+ "#df_preds=softmax(df_preds,[x+'_p' for x in sch.columns])\n",
+ "#df_preds=softmax(df_preds,[x+'_p' for x in sca.columns])\n",
+ "#df_preds=softmax(df_preds,[x+'_p' for x in scgd.columns])\n",
+ "df_preds['w1_gd']=df_preds[['gd_6.0_p','gd_5.0_p','gd_4.0_p','gd_3.0_p','gd_2.0_p','gd_1.0_p']].sum(axis=1)\n",
+ "df_preds['wx_gd']=df_preds['gd_0.0_p']\n",
+ "df_preds['w2_gd']=df_preds[['gd_-6.0_p','gd_-5.0_p','gd_-4.0_p','gd_-3.0_p','gd_-2.0_p','gd_-1.0_p']].sum(axis=1)\n",
+ "\n",
+ "#df_preds"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 108,
+ "outputs": []
+ },
+ {
+ "source": [
+ "\n",
+ "conv=PredictionsConverter('op', df_preds[['w1_p','wx_p','w2_p']].values, df_preds[['w1','wx','w2']].values, info_test.copy(), odds=True)\n",
+ "conv.make_df()\n",
+ "conv.profit()\n"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 109,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "WAG:12027; ACC: 0.4330256921925667; PRF: -246.04000000000002; ROI: -0.020457304398436852\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "WAG:12706; ACC: 0.42184794585235325; PRF: -509.78; ROI: -0.04012120258145758\n"
+ ]
+ }
+ ],
+ "source": [
+ "#info_test=info_test.rename(columns={'ft1':'sc1','ft2':'sc2'})\n",
+ "conv1=PredictionsConverter('op', df_preds[['w1_gd','wx_gd','w2_gd']].values, df_preds[['w1','wx','w2']].values, info_test.copy(), odds=True)\n",
+ "conv1.make_df(threshold='max')\n",
+ "conv1.profit()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 4619 10751 5302 4982 0.599 0.374 0.481 \n1 DRAW 1721 14498 4609 4826 0.632 0.255 0.263 \n2 AWAY 4604 10881 5267 4902 0.604 0.371 0.484 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.670 0.466 0.683 0.575 0.473 0.5 \n1 0.759 0.272 0.750 0.511 0.267 0.5 \n2 0.674 0.466 0.689 0.579 0.475 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 4619 \n 10751 \n 5302 \n 4982 \n 0.599 \n 0.374 \n 0.481 \n 0.670 \n 0.466 \n 0.683 \n 0.575 \n 0.473 \n 0.5 \n \n \n 1 \n DRAW \n 1721 \n 14498 \n 4609 \n 4826 \n 0.632 \n 0.255 \n 0.263 \n 0.759 \n 0.272 \n 0.750 \n 0.511 \n 0.267 \n 0.5 \n \n \n 2 \n AWAY \n 4604 \n 10881 \n 5267 \n 4902 \n 0.604 \n 0.371 \n 0.484 \n 0.674 \n 0.466 \n 0.689 \n 0.579 \n 0.475 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "conv.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " w1 wx w2 w1_p wx_p w2_p w1_gd wx_gd w2_gd\n",
+ "0 0 0 1 0.153697 0.409941 0.436362 0.193199 0.416397 0.390404\n",
+ "1 0 0 1 0.389200 0.324374 0.286426 0.375709 0.338702 0.285590\n",
+ "2 1 0 0 0.255113 0.255719 0.489168 0.257906 0.263558 0.478536\n",
+ "3 1 0 0 0.367066 0.345683 0.287250 0.380905 0.317920 0.301175\n",
+ "4 1 0 0 0.551421 0.274133 0.174446 0.545328 0.263596 0.191075\n",
+ "... .. .. .. ... ... ... ... ... ...\n",
+ "27111 0 0 1 0.218319 0.248316 0.533365 0.226258 0.287219 0.486522\n",
+ "27112 1 0 0 0.292947 0.343484 0.363569 0.297586 0.360643 0.341771\n",
+ "27113 1 0 0 0.894143 0.081727 0.024130 0.913940 0.067083 0.018978\n",
+ "27114 0 0 1 0.158910 0.196134 0.644956 0.163151 0.217454 0.619394\n",
+ "27115 0 1 0 0.650707 0.267750 0.081543 0.660329 0.237130 0.102541\n",
+ "\n",
+ "[27116 rows x 9 columns]"
+ ],
+ "text/html": "\n\n
\n \n \n \n w1 \n wx \n w2 \n w1_p \n wx_p \n w2_p \n w1_gd \n wx_gd \n w2_gd \n \n \n \n \n 0 \n 0 \n 0 \n 1 \n 0.153697 \n 0.409941 \n 0.436362 \n 0.193199 \n 0.416397 \n 0.390404 \n \n \n 1 \n 0 \n 0 \n 1 \n 0.389200 \n 0.324374 \n 0.286426 \n 0.375709 \n 0.338702 \n 0.285590 \n \n \n 2 \n 1 \n 0 \n 0 \n 0.255113 \n 0.255719 \n 0.489168 \n 0.257906 \n 0.263558 \n 0.478536 \n \n \n 3 \n 1 \n 0 \n 0 \n 0.367066 \n 0.345683 \n 0.287250 \n 0.380905 \n 0.317920 \n 0.301175 \n \n \n 4 \n 1 \n 0 \n 0 \n 0.551421 \n 0.274133 \n 0.174446 \n 0.545328 \n 0.263596 \n 0.191075 \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 27111 \n 0 \n 0 \n 1 \n 0.218319 \n 0.248316 \n 0.533365 \n 0.226258 \n 0.287219 \n 0.486522 \n \n \n 27112 \n 1 \n 0 \n 0 \n 0.292947 \n 0.343484 \n 0.363569 \n 0.297586 \n 0.360643 \n 0.341771 \n \n \n 27113 \n 1 \n 0 \n 0 \n 0.894143 \n 0.081727 \n 0.024130 \n 0.913940 \n 0.067083 \n 0.018978 \n \n \n 27114 \n 0 \n 0 \n 1 \n 0.158910 \n 0.196134 \n 0.644956 \n 0.163151 \n 0.217454 \n 0.619394 \n \n \n 27115 \n 0 \n 1 \n 0 \n 0.650707 \n 0.267750 \n 0.081543 \n 0.660329 \n 0.237130 \n 0.102541 \n \n \n
\n
27116 rows × 9 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 160
+ }
+ ],
+ "source": [
+ "df_preds[['w1','wx','w2','w1_p','wx_p','w2_p','w1_gd','wx_gd','w2_gd']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "WAG:5734; ACC: 0.14318102546215555; PRF: -721.0699999999999; ROI: -0.1257534007673526\nWAG:8301; ACC: 0.5372846644982532; PRF: -215.48000000000002; ROI: -0.02595831827490664\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 385 1042 2445 1862 0.249 0.392 0.171 \n1 DRAW 1 4511 1 1221 0.787 0.213 0.001 \n2 AWAY 435 1002 2467 1830 0.251 0.395 0.192 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.299 0.136 0.359 0.235 0.152 0.5 \n1 1.000 0.500 0.787 0.500 0.002 0.5 \n2 0.289 0.150 0.354 0.240 0.168 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 385 \n 1042 \n 2445 \n 1862 \n 0.249 \n 0.392 \n 0.171 \n 0.299 \n 0.136 \n 0.359 \n 0.235 \n 0.152 \n 0.5 \n \n \n 1 \n DRAW \n 1 \n 4511 \n 1 \n 1221 \n 0.787 \n 0.213 \n 0.001 \n 1.000 \n 0.500 \n 0.787 \n 0.500 \n 0.002 \n 0.5 \n \n \n 2 \n AWAY \n 435 \n 1002 \n 2467 \n 1830 \n 0.251 \n 0.395 \n 0.192 \n 0.289 \n 0.150 \n 0.354 \n 0.240 \n 0.168 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 3874 7598 2999 2466 0.677 0.374 0.611 \n1 DRAW 374 12079 818 3666 0.735 0.239 0.093 \n2 AWAY 4656 6143 4237 1901 0.638 0.387 0.710 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.717 0.564 0.755 0.664 0.586 0.5 \n1 0.937 0.314 0.767 0.515 0.143 0.5 \n2 0.592 0.524 0.764 0.651 0.603 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 3874 \n 7598 \n 2999 \n 2466 \n 0.677 \n 0.374 \n 0.611 \n 0.717 \n 0.564 \n 0.755 \n 0.664 \n 0.586 \n 0.5 \n \n \n 1 \n DRAW \n 374 \n 12079 \n 818 \n 3666 \n 0.735 \n 0.239 \n 0.093 \n 0.937 \n 0.314 \n 0.767 \n 0.515 \n 0.143 \n 0.5 \n \n \n 2 \n AWAY \n 4656 \n 6143 \n 4237 \n 1901 \n 0.638 \n 0.387 \n 0.710 \n 0.592 \n 0.524 \n 0.764 \n 0.651 \n 0.603 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "from api.predictions_converter import PredictionsConverter\n",
+ "info_test=info_test.rename(columns={'ft1':'sc1','ft2':'sc2'})\n",
+ "conv_bookies=PredictionsConverter('op', api.util.odds2prob(info_test.copy()).values, labels_test, info_test.copy(), odds=True)\n",
+ "conv_bookies.make_df()\n",
+ "conv=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=True)\n",
+ "conv.make_df()\n",
+ "\n",
+ "conv_bookies.profit()\n",
+ "conv.profit()\n",
+ "conv_bookies.performance_metrics()\n",
+ "conv.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "WAG:12706; ACC: 0.21611836927435857; PRF: -1422.03; ROI: -0.1119179915000787\nWAG:12706; ACC: 0.48504643475523374; PRF: -314.58; ROI: -0.024758381866834565\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 1340 3023 4935 3408 0.343 0.374 0.282 \n1 DRAW 9 9467 25 3205 0.746 0.253 0.003 \n2 AWAY 1397 2945 5017 3347 0.342 0.373 0.294 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.380 0.214 0.470 0.331 0.243 0.5 \n1 0.997 0.265 0.747 0.500 0.006 0.5 \n2 0.370 0.218 0.468 0.332 0.250 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 1340 \n 3023 \n 4935 \n 3408 \n 0.343 \n 0.374 \n 0.282 \n 0.380 \n 0.214 \n 0.470 \n 0.331 \n 0.243 \n 0.5 \n \n \n 1 \n DRAW \n 9 \n 9467 \n 25 \n 3205 \n 0.746 \n 0.253 \n 0.003 \n 0.997 \n 0.265 \n 0.747 \n 0.500 \n 0.006 \n 0.5 \n \n \n 2 \n AWAY \n 1397 \n 2945 \n 5017 \n 3347 \n 0.342 \n 0.373 \n 0.294 \n 0.370 \n 0.218 \n 0.468 \n 0.332 \n 0.250 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 5525 11713 5265 4613 0.636 0.374 0.545 \n1 DRAW 873 18171 2029 6043 0.702 0.255 0.126 \n2 AWAY 6314 9944 7110 3748 0.600 0.371 0.628 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.690 0.512 0.717 0.617 0.528 0.5 \n1 0.900 0.301 0.750 0.513 0.178 0.5 \n2 0.583 0.470 0.726 0.605 0.538 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 5525 \n 11713 \n 5265 \n 4613 \n 0.636 \n 0.374 \n 0.545 \n 0.690 \n 0.512 \n 0.717 \n 0.617 \n 0.528 \n 0.5 \n \n \n 1 \n DRAW \n 873 \n 18171 \n 2029 \n 6043 \n 0.702 \n 0.255 \n 0.126 \n 0.900 \n 0.301 \n 0.750 \n 0.513 \n 0.178 \n 0.5 \n \n \n 2 \n AWAY \n 6314 \n 9944 \n 7110 \n 3748 \n 0.600 \n 0.371 \n 0.628 \n 0.583 \n 0.470 \n 0.726 \n 0.605 \n 0.538 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "conv_bookies1=PredictionsConverter('op', odds2prob(info_test.copy()).values, labels_test, info_test.copy(), odds=True)\n",
+ "conv_bookies1.make_df(threshold='max')\n",
+ "conv1=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=True)\n",
+ "conv1.make_df(threshold='max')\n",
+ "\n",
+ "conv_bookies1.profit()\n",
+ "conv1.profit()\n",
+ "conv_bookies1.performance_metrics()\n",
+ "conv1.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " ds country \\\n",
+ "0 2017-10-28 17:00:00+00:00 argentina \n",
+ "1 2018-08-11 14:00:00+00:00 england \n",
+ "2 2018-12-01 19:30:00+00:00 italy \n",
+ "3 2018-08-26 12:30:00+00:00 netherlands \n",
+ "4 2017-05-14 18:00:00+00:00 spain \n",
+ "... ... ... \n",
+ "13553 2018-08-14 18:45:00+00:00 england \n",
+ "13554 2018-04-07 14:00:00+00:00 ukraine \n",
+ "13555 2015-12-12 17:30:00+00:00 austria \n",
+ "13556 2016-09-21 15:30:00+00:00 finland \n",
+ "13557 2020-02-07 22:00:00+00:00 argentina \n",
+ "\n",
+ " liga t1 t2 \\\n",
+ "0 liga-profesional-de-futbol ca huracan lanus \n",
+ "1 championship aston villa wigan athletic \n",
+ "2 serie-a sampdoria bologna \n",
+ "3 eredivisie fc utrecht vvvvenlo \n",
+ "4 laliga athletic bilbao leganes \n",
+ "... ... ... ... \n",
+ "13553 efl-cup yeovil town aston villa \n",
+ "13554 premier-league-relegation-round oleksandria pfc feniks bucha \n",
+ "13555 bundesliga sv ried wolfsberger ac \n",
+ "13556 veikkausliiga ifk mariehamn inter turku \n",
+ "13557 liga-profesional-de-futbol aldosivi central cordoba \n",
+ "\n",
+ " sc1 sc2 odds_home odds_draw odds_away winner_home winner_draw \\\n",
+ "0 4.0 0.0 1.84 3.21 4.84 1 0 \n",
+ "1 3.0 2.0 0.00 0.00 0.00 1 0 \n",
+ "2 4.0 1.0 2.02 3.26 4.17 1 0 \n",
+ "3 1.0 1.0 0.00 0.00 0.00 0 1 \n",
+ "4 1.0 1.0 1.47 4.43 7.41 0 1 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "13553 0.0 1.0 0.00 0.00 0.00 0 0 \n",
+ "13554 2.0 0.0 1.82 3.25 4.58 1 0 \n",
+ "13555 1.0 0.0 0.00 0.00 0.00 1 0 \n",
+ "13556 1.0 1.0 1.90 3.37 4.13 0 1 \n",
+ "13557 0.0 2.0 2.49 2.85 3.25 0 0 \n",
+ "\n",
+ " winner_away pred_home pred_draw pred_away prob_home prob_draw \\\n",
+ "0 0 0 1 0 0.419015 0.785230 \n",
+ "1 0 1 0 0 0.916121 0.320084 \n",
+ "2 0 0 1 0 0.416682 0.878468 \n",
+ "3 0 1 0 0 0.908112 0.472075 \n",
+ "4 0 1 1 0 0.645087 0.798480 \n",
+ "... ... ... ... ... ... ... \n",
+ "13553 1 1 1 0 0.738303 0.974766 \n",
+ "13554 0 1 1 0 0.570895 0.614656 \n",
+ "13555 0 0 0 1 0.429426 0.001383 \n",
+ "13556 0 1 0 0 0.567031 0.176261 \n",
+ "13557 1 1 1 0 0.623006 0.602990 \n",
+ "\n",
+ " prob_away win prf \n",
+ "0 0.177421 0 -1.00 \n",
+ "1 0.033951 1 -1.00 \n",
+ "2 0.090711 0 -1.00 \n",
+ "3 0.014991 0 0.00 \n",
+ "4 0.056598 1 3.43 \n",
+ "... ... ... ... \n",
+ "13553 0.002282 0 0.00 \n",
+ "13554 0.238248 1 2.25 \n",
+ "13555 0.962793 0 0.00 \n",
+ "13556 0.496240 0 -1.00 \n",
+ "13557 0.117039 0 -1.00 \n",
+ "\n",
+ "[13300 rows x 21 columns]"
+ ],
+ "text/html": "\n\n
\n \n \n \n ds \n country \n liga \n t1 \n t2 \n sc1 \n sc2 \n odds_home \n odds_draw \n odds_away \n winner_home \n winner_draw \n winner_away \n pred_home \n pred_draw \n pred_away \n prob_home \n prob_draw \n prob_away \n win \n prf \n \n \n \n \n 0 \n 2017-10-28 17:00:00+00:00 \n argentina \n liga-profesional-de-futbol \n ca huracan \n lanus \n 4.0 \n 0.0 \n 1.84 \n 3.21 \n 4.84 \n 1 \n 0 \n 0 \n 0 \n 1 \n 0 \n 0.419015 \n 0.785230 \n 0.177421 \n 0 \n -1.00 \n \n \n 1 \n 2018-08-11 14:00:00+00:00 \n england \n championship \n aston villa \n wigan athletic \n 3.0 \n 2.0 \n 0.00 \n 0.00 \n 0.00 \n 1 \n 0 \n 0 \n 1 \n 0 \n 0 \n 0.916121 \n 0.320084 \n 0.033951 \n 1 \n -1.00 \n \n \n 2 \n 2018-12-01 19:30:00+00:00 \n italy \n serie-a \n sampdoria \n bologna \n 4.0 \n 1.0 \n 2.02 \n 3.26 \n 4.17 \n 1 \n 0 \n 0 \n 0 \n 1 \n 0 \n 0.416682 \n 0.878468 \n 0.090711 \n 0 \n -1.00 \n \n \n 3 \n 2018-08-26 12:30:00+00:00 \n netherlands \n eredivisie \n fc utrecht \n vvvvenlo \n 1.0 \n 1.0 \n 0.00 \n 0.00 \n 0.00 \n 0 \n 1 \n 0 \n 1 \n 0 \n 0 \n 0.908112 \n 0.472075 \n 0.014991 \n 0 \n 0.00 \n \n \n 4 \n 2017-05-14 18:00:00+00:00 \n spain \n laliga \n athletic bilbao \n leganes \n 1.0 \n 1.0 \n 1.47 \n 4.43 \n 7.41 \n 0 \n 1 \n 0 \n 1 \n 1 \n 0 \n 0.645087 \n 0.798480 \n 0.056598 \n 1 \n 3.43 \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 13553 \n 2018-08-14 18:45:00+00:00 \n england \n efl-cup \n yeovil town \n aston villa \n 0.0 \n 1.0 \n 0.00 \n 0.00 \n 0.00 \n 0 \n 0 \n 1 \n 1 \n 1 \n 0 \n 0.738303 \n 0.974766 \n 0.002282 \n 0 \n 0.00 \n \n \n 13554 \n 2018-04-07 14:00:00+00:00 \n ukraine \n premier-league-relegation-round \n oleksandria \n pfc feniks bucha \n 2.0 \n 0.0 \n 1.82 \n 3.25 \n 4.58 \n 1 \n 0 \n 0 \n 1 \n 1 \n 0 \n 0.570895 \n 0.614656 \n 0.238248 \n 1 \n 2.25 \n \n \n 13555 \n 2015-12-12 17:30:00+00:00 \n austria \n bundesliga \n sv ried \n wolfsberger ac \n 1.0 \n 0.0 \n 0.00 \n 0.00 \n 0.00 \n 1 \n 0 \n 0 \n 0 \n 0 \n 1 \n 0.429426 \n 0.001383 \n 0.962793 \n 0 \n 0.00 \n \n \n 13556 \n 2016-09-21 15:30:00+00:00 \n finland \n veikkausliiga \n ifk mariehamn \n inter turku \n 1.0 \n 1.0 \n 1.90 \n 3.37 \n 4.13 \n 0 \n 1 \n 0 \n 1 \n 0 \n 0 \n 0.567031 \n 0.176261 \n 0.496240 \n 0 \n -1.00 \n \n \n 13557 \n 2020-02-07 22:00:00+00:00 \n argentina \n liga-profesional-de-futbol \n aldosivi \n central cordoba \n 0.0 \n 2.0 \n 2.49 \n 2.85 \n 3.25 \n 0 \n 0 \n 1 \n 1 \n 1 \n 0 \n 0.623006 \n 0.602990 \n 0.117039 \n 0 \n -1.00 \n \n \n
\n
13300 rows × 21 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 87
+ }
+ ],
+ "source": [
+ "conv.DF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0.5372846644982532"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 126
+ }
+ ],
+ "source": [
+ "conv.DF.loc[conv.DF['odds_home']>0].win.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res=conv.DF.loc[conv.DF['odds_home']>0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(-110.32000000000002, 26.24999999999999, -121.71000000000002)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 125
+ }
+ ],
+ "source": [
+ "res[res['pred_home']==1].prf.sum(),res[res['pred_draw']==1].prf.sum(),res[res['pred_away']==1].prf.sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res.to_csv('data/pred.csv', index=False)"
+ ]
+ },
+ {
+ "source": [
+ "# sdef\n",
+ "$ \\frac{1}{2} $"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/prerequisites/op/le_country b/prerequisites/op/le_country
new file mode 100644
index 0000000..db1ae40
Binary files /dev/null and b/prerequisites/op/le_country differ
diff --git a/prerequisites/op/le_t1_t2 b/prerequisites/op/le_t1_t2
new file mode 100644
index 0000000..0c641dd
Binary files /dev/null and b/prerequisites/op/le_t1_t2 differ
diff --git a/prerequisites/op/ohe_country_id b/prerequisites/op/ohe_country_id
new file mode 100644
index 0000000..f87a75e
Binary files /dev/null and b/prerequisites/op/ohe_country_id differ
diff --git a/prerequisites/op/ohe_winner b/prerequisites/op/ohe_winner
new file mode 100644
index 0000000..2393feb
Binary files /dev/null and b/prerequisites/op/ohe_winner differ
diff --git a/prerequisites/op/sc_bn b/prerequisites/op/sc_bn
new file mode 100644
index 0000000..77a606b
Binary files /dev/null and b/prerequisites/op/sc_bn differ
diff --git a/prerequisites/sofa/le_country b/prerequisites/sofa/le_country
new file mode 100644
index 0000000..c076366
Binary files /dev/null and b/prerequisites/sofa/le_country differ
diff --git a/prerequisites/sofa/le_formation b/prerequisites/sofa/le_formation
new file mode 100644
index 0000000..17629d5
Binary files /dev/null and b/prerequisites/sofa/le_formation differ
diff --git a/prerequisites/sofa/le_formation_h_formation_a b/prerequisites/sofa/le_formation_h_formation_a
new file mode 100644
index 0000000..17629d5
Binary files /dev/null and b/prerequisites/sofa/le_formation_h_formation_a differ
diff --git a/prerequisites/sofa/le_homeTeam_awayTeam b/prerequisites/sofa/le_homeTeam_awayTeam
new file mode 100644
index 0000000..ea0e451
Binary files /dev/null and b/prerequisites/sofa/le_homeTeam_awayTeam differ
diff --git a/prerequisites/sofa/le_t1_t2 b/prerequisites/sofa/le_t1_t2
new file mode 100644
index 0000000..2ee137f
Binary files /dev/null and b/prerequisites/sofa/le_t1_t2 differ
diff --git a/prerequisites/sofa/le_team b/prerequisites/sofa/le_team
new file mode 100644
index 0000000..ea0e451
Binary files /dev/null and b/prerequisites/sofa/le_team differ
diff --git a/prerequisites/sofa/ohe_away_formation b/prerequisites/sofa/ohe_away_formation
new file mode 100644
index 0000000..399a110
Binary files /dev/null and b/prerequisites/sofa/ohe_away_formation differ
diff --git a/prerequisites/sofa/ohe_country_id b/prerequisites/sofa/ohe_country_id
new file mode 100644
index 0000000..5b326dd
Binary files /dev/null and b/prerequisites/sofa/ohe_country_id differ
diff --git a/prerequisites/sofa/ohe_home_formation b/prerequisites/sofa/ohe_home_formation
new file mode 100644
index 0000000..399a110
Binary files /dev/null and b/prerequisites/sofa/ohe_home_formation differ
diff --git a/prerequisites/sofa/ohe_pop_r b/prerequisites/sofa/ohe_pop_r
new file mode 100644
index 0000000..3868e7e
Binary files /dev/null and b/prerequisites/sofa/ohe_pop_r differ
diff --git a/prerequisites/sofa/ohe_round b/prerequisites/sofa/ohe_round
new file mode 100644
index 0000000..2026021
Binary files /dev/null and b/prerequisites/sofa/ohe_round differ
diff --git a/prerequisites/sofa/ohe_winner b/prerequisites/sofa/ohe_winner
new file mode 100644
index 0000000..2393feb
Binary files /dev/null and b/prerequisites/sofa/ohe_winner differ
diff --git a/prerequisites/sofa/r_votes b/prerequisites/sofa/r_votes
new file mode 100644
index 0000000..6905ca2
Binary files /dev/null and b/prerequisites/sofa/r_votes differ
diff --git a/prerequisites/sofa/sc_graph b/prerequisites/sofa/sc_graph
new file mode 100644
index 0000000..e588d09
Binary files /dev/null and b/prerequisites/sofa/sc_graph differ
diff --git a/prerequisites/sofa/sc_graph1_graph2 b/prerequisites/sofa/sc_graph1_graph2
new file mode 100644
index 0000000..4f61601
Binary files /dev/null and b/prerequisites/sofa/sc_graph1_graph2 differ
diff --git a/sofa.ipynb b/sofa.ipynb
new file mode 100644
index 0000000..7cc92f7
--- /dev/null
+++ b/sofa.ipynb
@@ -0,0 +1,442 @@
+{
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5-final"
+ },
+ "orig_nbformat": 2,
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.8.5 64-bit ('mlenv': conda)",
+ "metadata": {
+ "interpreter": {
+ "hash": "12f2fd9a8da6c9ddda222d67ff20ee53b82617d5a9ac88eb47f60b586ce1b05e"
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import pickle\n",
+ "\n",
+ "from sklearn.preprocessing import LabelEncoder,OneHotEncoder\n",
+ "from sklearn.model_selection import RepeatedKFold,train_test_split\n",
+ "import tensorflow as tf\n",
+ "from tensorflow import keras\n",
+ "from tensorflow.keras.layers import Dense, BatchNormalization,Dropout\n",
+ "from tensorflow.keras.models import Sequential\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "import api.util\n",
+ "from api.predictions_converter import PredictionsConverter\n",
+ "from api.sofa_dp import SofaDataProvider\n",
+ "\n",
+ "from IPython.display import display\n",
+ "pd.options.display.max_columns = None\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dp=SofaDataProvider(load=False)\n",
+ "data, labels, info, df=dp.provide_data()\n",
+ "#df=dp._load_data()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['awayScoreHT', 'country', 'country_id', 'ds', 'homeScoreHT', 'liga',\n",
+ " 'mid', 'round', 'sc1', 'sc2', 't1', 't2', 'tid1', 'tid2', 'winner',\n",
+ " 'formation_h', 'formation_a', 'home_formation', 'away_formation',\n",
+ " 'vote_home', 'vote_draw', 'vote_away', 'pop_r'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([0.49217639, 0.23613087, 0.27169275, 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 1. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 1. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 1. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 0. , 1. ,\n",
+ " 0. , 0. , 0. , 0. , 0. ,\n",
+ " 0. , 0. , 0. , 1. , 0. ,\n",
+ " 0. ])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ],
+ "source": [
+ "data[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([1., 0., 0.])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ],
+ "source": [
+ "labels[0]"
+ ]
+ },
+ {
+ "source": [
+ "# Analysis"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "data_train, data_test, labels_train, labels_test, info_train, info_test = train_test_split(data, labels, info, test_size=0.2, random_state=42)\n",
+ "print(data_train.shape, data_test.shape)"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(69716, 156) (17429, 156)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "source": [
+ "df.isnull().any()"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_model(n_inputs, n_outputs):\n",
+ " model = Sequential()\n",
+ " model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dropout(0.2))\n",
+ " model.add(Dense(512, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " #model.add(Dropout(0.2))\n",
+ " model.add(Dense(64, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(16, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))\n",
+ " model.add(Dense(n_outputs, activation='sigmoid'))\n",
+ " model.compile(loss='binary_crossentropy', optimizer='adam')\n",
+ " return model\n",
+ "\n",
+ "def evaluate_model(X, y):\n",
+ " results = list()\n",
+ " n_inputs, n_outputs = X.shape[1], y.shape[1]\n",
+ " # define evaluation procedure\n",
+ " cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)\n",
+ " # enumerate folds\n",
+ " for train_ix, test_ix in cv.split(X):\n",
+ " # prepare data\n",
+ " X_train, X_test = X[train_ix], X[test_ix]\n",
+ " y_train, y_test = y[train_ix], y[test_ix]\n",
+ " # define model\n",
+ " model = get_model(n_inputs, n_outputs)\n",
+ " # fit model\n",
+ " model.fit(X_train, y_train, epochs=10)\n",
+ " # make a prediction on the test set\n",
+ " yhat = model.predict(X_test)\n",
+ " # round probabilities to class labels\n",
+ " yhat = yhat.round()\n",
+ " # calculate accuracy\n",
+ " acc = accuracy_score(y_test, yhat)\n",
+ " # store result\n",
+ " print('>%.3f' % acc)\n",
+ " results.append(acc)\n",
+ " break\n",
+ " return results, model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1/10\n",
+ "1743/1743 [==============================] - 19s 10ms/step - loss: 0.6016\n",
+ "Epoch 2/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.5796\n",
+ "Epoch 3/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.5621\n",
+ "Epoch 4/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.5315\n",
+ "Epoch 5/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.4865\n",
+ "Epoch 6/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.4308\n",
+ "Epoch 7/10\n",
+ "1743/1743 [==============================] - 17s 10ms/step - loss: 0.3800\n",
+ "Epoch 8/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.3285\n",
+ "Epoch 9/10\n",
+ "1743/1743 [==============================] - 17s 10ms/step - loss: 0.2902\n",
+ "Epoch 10/10\n",
+ "1743/1743 [==============================] - 18s 10ms/step - loss: 0.2617\n",
+ ">0.500\n",
+ "Accuracy: 0.500 (0.000)\n"
+ ]
+ }
+ ],
+ "source": [
+ "results, model = evaluate_model(data_train, labels_train)\n",
+ "# summarize performance\n",
+ "print('Accuracy: %.3f (%.3f)' % (np.mean(results), np.std(results)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yhat = model.predict(data_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def odds2prob(df):\n",
+ " df['odds_away']=1/df['odds_away']\n",
+ " df['odds_draw']=1/df['odds_draw']\n",
+ " df['odds_home']=1/df['odds_home']\n",
+ " df['margin']=df[['odds_away','odds_draw','odds_home']].sum(axis=1)\n",
+ " df['odds_away']=df['odds_away']/df['margin']\n",
+ " df['odds_draw']=df['odds_draw']/df['margin']\n",
+ " df['odds_home']=df['odds_home']/df['margin']\n",
+ " return df[['odds_away','odds_draw','odds_home']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " mid ts country \\\n",
+ "10600 6897580 2015-11-07 14:00:00+00:00 italy \n",
+ "47569 7895571 2018-11-10 15:15:00+00:00 spain \n",
+ "17246 6834033 2016-10-07 00:45:00+00:00 south-america \n",
+ "71236 8747958 2020-08-23 19:00:00+00:00 brazil \n",
+ "64437 8246175 2020-02-15 19:00:00+00:00 france \n",
+ "... ... ... ... \n",
+ "64995 8247260 2020-02-28 19:00:00+00:00 france \n",
+ "40229 7471896 2017-09-30 17:30:00+00:00 romania \n",
+ "80026 8747900 2020-09-05 22:00:00+00:00 brazil \n",
+ "80557 9030877 2020-09-27 23:30:00+00:00 paraguay \n",
+ "16955 7163394 2016-09-25 11:30:00+00:00 russia \n",
+ "\n",
+ " tournament home_tid away_tid \\\n",
+ "10600 serie-b 1212 419 \n",
+ "47569 laliga 925 2106 \n",
+ "17246 world-cup-qualification-conmebol 324 302 \n",
+ "71236 brasileiro-serie-a 2119 961 \n",
+ "64437 ligue-1 2017 1490 \n",
+ "... ... ... ... \n",
+ "64995 ligue-2 1883 1694 \n",
+ "40229 liga-i 582 169 \n",
+ "80026 brasileiro-serie-a 493 316 \n",
+ "80557 primera-division-apertura 912 966 \n",
+ "16955 premier-league 158 70 \n",
+ "\n",
+ " homeTeamShort homeScoreHT homeScoreFT awayTeamShort \\\n",
+ "10600 Latina Calcio 1932 0.0 1.0 Cesena \n",
+ "47569 Getafe 0.0 0.0 Valencia \n",
+ "17246 Brazil 4.0 5.0 Bolivia \n",
+ "71236 Vasco 0.0 0.0 Grêmio \n",
+ "64437 Toulouse 0.0 0.0 Nice \n",
+ "... ... ... ... ... \n",
+ "64995 Sochaux 0.0 1.0 Rodez \n",
+ "40229 Dinamo B. 0.0 1.0 Astra \n",
+ "80026 Corinthians 1.0 2.0 Botafogo \n",
+ "80557 General Díaz 0.0 0.0 Guaraní \n",
+ "16955 Arsenal 0.0 0.0 Akhmat \n",
+ "\n",
+ " awayScoreHT awayScoreFT winner \n",
+ "10600 0.0 0.0 home \n",
+ "47569 0.0 1.0 away \n",
+ "17246 0.0 0.0 home \n",
+ "71236 0.0 0.0 draw \n",
+ "64437 1.0 2.0 away \n",
+ "... ... ... ... \n",
+ "64995 0.0 1.0 draw \n",
+ "40229 0.0 1.0 draw \n",
+ "80026 1.0 2.0 draw \n",
+ "80557 0.0 1.0 away \n",
+ "16955 0.0 0.0 draw \n",
+ "\n",
+ "[17879 rows x 13 columns]"
+ ],
+ "text/html": "\n\n
\n \n \n \n mid \n ts \n country \n tournament \n home_tid \n away_tid \n homeTeamShort \n homeScoreHT \n homeScoreFT \n awayTeamShort \n awayScoreHT \n awayScoreFT \n winner \n \n \n \n \n 10600 \n 6897580 \n 2015-11-07 14:00:00+00:00 \n italy \n serie-b \n 1212 \n 419 \n Latina Calcio 1932 \n 0.0 \n 1.0 \n Cesena \n 0.0 \n 0.0 \n home \n \n \n 47569 \n 7895571 \n 2018-11-10 15:15:00+00:00 \n spain \n laliga \n 925 \n 2106 \n Getafe \n 0.0 \n 0.0 \n Valencia \n 0.0 \n 1.0 \n away \n \n \n 17246 \n 6834033 \n 2016-10-07 00:45:00+00:00 \n south-america \n world-cup-qualification-conmebol \n 324 \n 302 \n Brazil \n 4.0 \n 5.0 \n Bolivia \n 0.0 \n 0.0 \n home \n \n \n 71236 \n 8747958 \n 2020-08-23 19:00:00+00:00 \n brazil \n brasileiro-serie-a \n 2119 \n 961 \n Vasco \n 0.0 \n 0.0 \n Grêmio \n 0.0 \n 0.0 \n draw \n \n \n 64437 \n 8246175 \n 2020-02-15 19:00:00+00:00 \n france \n ligue-1 \n 2017 \n 1490 \n Toulouse \n 0.0 \n 0.0 \n Nice \n 1.0 \n 2.0 \n away \n \n \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n ... \n \n \n 64995 \n 8247260 \n 2020-02-28 19:00:00+00:00 \n france \n ligue-2 \n 1883 \n 1694 \n Sochaux \n 0.0 \n 1.0 \n Rodez \n 0.0 \n 1.0 \n draw \n \n \n 40229 \n 7471896 \n 2017-09-30 17:30:00+00:00 \n romania \n liga-i \n 582 \n 169 \n Dinamo B. \n 0.0 \n 1.0 \n Astra \n 0.0 \n 1.0 \n draw \n \n \n 80026 \n 8747900 \n 2020-09-05 22:00:00+00:00 \n brazil \n brasileiro-serie-a \n 493 \n 316 \n Corinthians \n 1.0 \n 2.0 \n Botafogo \n 1.0 \n 2.0 \n draw \n \n \n 80557 \n 9030877 \n 2020-09-27 23:30:00+00:00 \n paraguay \n primera-division-apertura \n 912 \n 966 \n General Díaz \n 0.0 \n 0.0 \n Guaraní \n 0.0 \n 1.0 \n away \n \n \n 16955 \n 7163394 \n 2016-09-25 11:30:00+00:00 \n russia \n premier-league \n 158 \n 70 \n Arsenal \n 0.0 \n 0.0 \n Akhmat \n 0.0 \n 0.0 \n draw \n \n \n
\n
17879 rows × 13 columns
\n
"
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ],
+ "source": [
+ "info_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 4686 4837 2930 1898 0.664 0.459 0.712 \n1 DRAW 1446 8871 1944 2090 0.719 0.246 0.409 \n2 AWAY 1930 8674 1446 2301 0.739 0.295 0.456 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.623 0.615 0.718 0.667 0.660 0.5 \n1 0.820 0.427 0.809 0.615 0.418 0.5 \n2 0.857 0.572 0.790 0.657 0.507 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 4686 \n 4837 \n 2930 \n 1898 \n 0.664 \n 0.459 \n 0.712 \n 0.623 \n 0.615 \n 0.718 \n 0.667 \n 0.660 \n 0.5 \n \n \n 1 \n DRAW \n 1446 \n 8871 \n 1944 \n 2090 \n 0.719 \n 0.246 \n 0.409 \n 0.820 \n 0.427 \n 0.809 \n 0.615 \n 0.418 \n 0.5 \n \n \n 2 \n AWAY \n 1930 \n 8674 \n 1446 \n 2301 \n 0.739 \n 0.295 \n 0.456 \n 0.857 \n 0.572 \n 0.790 \n 0.657 \n 0.507 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "from api.predictions_converter import PredictionsConverter\n",
+ "#conv_bookies=PredictionsConverter('op', api.util.odds2prob(info_test.copy()).values, labels_test, info_test.copy(), odds=False)\n",
+ "#conv_bookies.make_df()\n",
+ "conv=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=False)\n",
+ "conv.make_df()\n",
+ "\n",
+ "#conv_bookies.profit()\n",
+ "#conv.profit()\n",
+ "#conv_bookies.performance_metrics()\n",
+ "conv.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": " Name TP TN FP FN Accuracy Prevalence Sensitivity \\\n0 HOME 5201 5544 3525 2373 0.646 0.455 0.687 \n1 DRAW 1620 10126 2353 2544 0.706 0.250 0.389 \n2 AWAY 2137 9931 1807 2768 0.725 0.295 0.436 \n\n Specificity PPV NPV AUC F1 Threshold \n0 0.611 0.596 0.700 0.649 0.638 0.5 \n1 0.811 0.408 0.799 0.600 0.398 0.5 \n2 0.846 0.542 0.782 0.641 0.483 0.5 ",
+ "text/html": "\n\n
\n \n \n \n Name \n TP \n TN \n FP \n FN \n Accuracy \n Prevalence \n Sensitivity \n Specificity \n PPV \n NPV \n AUC \n F1 \n Threshold \n \n \n \n \n 0 \n HOME \n 5201 \n 5544 \n 3525 \n 2373 \n 0.646 \n 0.455 \n 0.687 \n 0.611 \n 0.596 \n 0.700 \n 0.649 \n 0.638 \n 0.5 \n \n \n 1 \n DRAW \n 1620 \n 10126 \n 2353 \n 2544 \n 0.706 \n 0.250 \n 0.389 \n 0.811 \n 0.408 \n 0.799 \n 0.600 \n 0.398 \n 0.5 \n \n \n 2 \n AWAY \n 2137 \n 9931 \n 1807 \n 2768 \n 0.725 \n 0.295 \n 0.436 \n 0.846 \n 0.542 \n 0.782 \n 0.641 \n 0.483 \n 0.5 \n \n \n
\n
"
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "#conv_bookies1=PredictionsConverter('op', odds2prob(info_test.copy()).values, labels_test, info_test.copy())\n",
+ "#conv_bookies1.make_df(threshold='max')\n",
+ "conv1=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=False)\n",
+ "conv1.make_df(threshold='max')\n",
+ "\n",
+ "#conv_bookies1.profit()\n",
+ "#conv1.profit()\n",
+ "#conv_bookies1.performance_metrics()\n",
+ "conv1.performance_metrics()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ]
+}
\ No newline at end of file