-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path__init__.py
77 lines (67 loc) · 1.97 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import requests
import PyPDF2
import csv
from fixtures import centers
# PyPDF2 can't read a requests response stream so write the file to disk.
response = requests.get("https://hhinternet.blob.core.windows.net/wait-times/testing-wait-times.pdf")
with open("testing-wait-times.pdf", "wb") as f:
f.write(response.content)
# Reopen that same file.
file = open("testing-wait-times.pdf", "rb")
pdfReader = PyPDF2.PdfFileReader(file)
page = pdfReader.getPage(0)
lines = page.extractText().split("\n")
import logging
cursor = 0
keep_going = True
rows = []
# Using a while loop because the offsets are different for each entry.
while keep_going:
# In the current formatting, the terminator is a date.
if re.match("\d{1,2}\/\d{1,2}\/\d{4}", lines[cursor]):
keep_going = False
break
place = lines[cursor]
wait_time = lines[cursor + 1]
# If the wait time is not reported, there is no line for it so update the
# offset by one fewer.
if wait_time == "Not Reported Yet":
cursor += 3
last_reported = None
else:
last_reported = lines[cursor + 3]
cursor += 4
center = centers.get(place)
if center is not None:
address = center.get("address")
boro = center.get("boro")
fullname = center.get("fullname", place)
lat = center.get("lat", place)
lng = center.get("lng", place)
else:
logging.warning("Center not found %s " % place)
address = None
boro = None
fullname = None
rows.append({
"location": place,
"fullname": fullname,
"address": address,
"lat": lat,
"lng": lng,
"borough": boro,
"wait_time": wait_time,
"last_reported": last_reported,
})
writer = csv.DictWriter(open("wait-times.csv", "w"), fieldnames=[
"fullname",
"wait_time",
"last_reported",
"address",
"lat",
"lng",
"borough",
"location"])
writer.writeheader()
writer.writerows(rows)