-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpopularity.py
114 lines (89 loc) · 3.31 KB
/
popularity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
from datetime import datetime, timedelta
import pandas as pd
import populartimes
from apikeys import API_KEY
HOURS_OF_INTEREST = {
'restaurant': range(9, 23),
'bar': list(range(12, 24)) + list(range(0, 4)),
'club': list(range(20, 24)) + list(range(0, 6)),
'train station': range(5, 21),
'tourist information': range(8, 21),
'sights': range(6, 21),
'park': range(6, 21),
'mall': range(6, 21),
'supermarket': range(6, 21),
'street market': range(6, 21),
'hardware store': range(6, 21),
}
DATADIR = 'data/popularity'
EVERY_NTH_HOUR = 3 # None
ON_WEEKDAYS = (1, 3, 5) # = Tue, Thu, Sat
#%%
pois = pd.read_csv('data/places_of_interest_tz.csv') #.sample(20)
utcnow = datetime.utcnow()
utcdate_ymd = utcnow.strftime('%Y-%m-%d')
utcweekday = utcnow.weekday()
utchour = utcnow.hour
datadir_today = os.path.join(DATADIR, utcdate_ymd)
if not os.path.exists(datadir_today):
print('creating directory', datadir_today, '\n')
os.mkdir(datadir_today, mode=0o755)
#%%
resultrows = []
n_queries = 0
for poi_i, poirow in pois.iterrows():
print('place of interest %d/%d: %s, %s' % (poi_i+1, len(pois), poirow.city, poirow.country))
poi_tzoffset = timedelta(seconds=poirow.tz_rawoffset + poirow.tz_dstoffset)
poi_localtime = utcnow + poi_tzoffset
poi_localwd = poi_localtime.weekday()
poi_localhour = poi_localtime.hour
if ON_WEEKDAYS and poi_localwd not in ON_WEEKDAYS:
print('skipping (local weekday is %d and will only run on weekdays %s)' % (poi_localwd, str(ON_WEEKDAYS)))
continue
if EVERY_NTH_HOUR and poi_localhour % EVERY_NTH_HOUR != 0:
print('skipping (local hour %d and will only run every %d hour)' % (poi_localhour, EVERY_NTH_HOUR))
continue
poi_hinterest = HOURS_OF_INTEREST.get(poirow['query'], list(range(6, 21)))
if not isinstance(poi_hinterest, list):
poi_hinterest = list(poi_hinterest)
if poi_localhour not in poi_hinterest:
print('> skipping (local hour %d not in hours of interest %s)'
% (poi_localhour, ', '.join(map(str, poi_hinterest))))
continue
try:
n_queries += 1
poptimes = populartimes.get_id(API_KEY, poirow.place_id)
except Exception: # catch any exception
poptimes = {}
if 'current_popularity' in poptimes and 'populartimes' in poptimes:
print('> got popularity data')
resultrows.append([
poirow.place_id,
utcdate_ymd,
utcweekday,
utchour,
poi_localtime.strftime('%Y-%m-%d'),
poi_localwd,
poi_localhour,
poptimes['current_popularity'],
poptimes['populartimes'][poi_localwd]['data'][poi_localhour]
])
else:
print('> failed to fetch popularity data')
print('\n')
#%%
print('made %d queries and got %d results' % (n_queries, len(resultrows)))
if resultrows:
popdata = pd.DataFrame(resultrows, columns=[
'place_id',
'utc_date', 'utc_weekday', 'utc_hour',
'local_date', 'local_weekday', 'local_hour',
'current_pop', 'usual_pop'
])
outfile = os.path.join(datadir_today, '%s_h%s.csv' % (utcdate_ymd, str(utchour).zfill(2)))
print('saving data to file', outfile)
popdata.to_csv(outfile, index=False)
else:
print('nothing to save')
print('done.')