-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_dataset_dump.py
116 lines (90 loc) · 2.93 KB
/
_dataset_dump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
import json
import shelve
import re
from operator import itemgetter
from collections import defaultdict
import requests
LTA_ACCOUNT_KEY = 'YOUR LTA ACCOUNT KEY'
def saveAsJson(thing, filename):
with open(filename+".json","w", encoding="utf-8") as thefile:
json.dump(thing, thefile, sort_keys=True, indent=4, ensure_ascii=False)
def loadFromJson(filename):
with open(filename+".json","r") as thefile:
return json.load(thefile)
def saveDictAsShelve(dicty, filename):
shelf = shelve.open(filename+'.shelve')
#shelf.update(dicty) #not working with unicode keys
for key in dicty:
shelf[str(key)] = dicty[key]
shelf.close()
def loadShelveAsDict(filename):
shelf = shelve.open(filename+'.shelve')
dicty = {}
for key, val in shelf.items():
dicty[key] = val
return dicty
def downloadData(name):
jsonDict = {}
#Authentication parameters
headers = {
'AccountKey' : LTA_ACCOUNT_KEY,
'accept' : 'application/json', #Request results in JSON
}
#API parameters
target = 'http://datamall2.mytransport.sg/ltaodataservice/' + name #Resource URL
skippy = 0
results = 50
while (results > 0):
#Query parameters
params = {"$skip": skippy}
#Obtain results
r = requests.get(target, params=params, headers=headers)
#Parse JSON to print
jsonObj = json.loads(r.content)
#print json.dumps(jsonObj, sort_keys=True, indent=4)
dictList = jsonObj["value"]
for dicty in dictList:
if name == "BusRoutes":
key = "{}_{}_{}".format(dicty['BusStopCode'], dicty['ServiceNo'], dicty['Direction'])
elif name == "BusStops":
key = "{}".format(dicty['BusStopCode'])
jsonDict[key] = dicty
skippy += 50
results = len(dictList)
print(results, skippy)
return jsonDict
def funnysort(listy):
def safeInt(thing):
try:
return int(re.sub(r"(?<=\d)\D+$", "", thing))
except ValueError:
return 99999
listy = [(i, safeInt(i)) for i in listy]
return [a for a,b in sorted(listy, key=itemgetter(1, 0))]
def combineDicts(routes, stops):
setDict = defaultdict(list)
for key, val in routes.items():
setDict[val["BusStopCode"]].append(val["ServiceNo"])
for key, val in setDict.items():
setDict[key] = funnysort(list(set(val)))
for key, val in stops.items():
val["Services"] = setDict[key]
return stops
if __name__=="__main__":
pass
# routesetDict = downloadData("BusRoutes")
# routesetDict = loadFromJson("routeset")
# routesetDict = loadShelveAsDict("routeset")
# saveAsJson(routesetDict, "routeset")
# saveDictAsShelve(routesetDict, "routeset")
# stopsetDict = downloadData("BusStops")
# stopsetDict = loadFromJson("stopset")
# stopsetDict = loadShelveAsDict("stopset")
# saveAsJson(stopsetDict, "stopset")
# saveDictAsShelve(stopsetDict, "stopset")
# stopsetDictPlus = combineDicts(routesetDict, stopsetDict)
# stopsetDictPlus = loadFromJson("stopsetplus")
# stopsetDictPlus = loadShelveAsDict("stopsetplus")
# saveAsJson(stopsetDictPlus, "stopsetplus")
# saveDictAsShelve(stopsetDictPlus, "stopsetplus")