-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathadd_meetup_events.py
169 lines (144 loc) · 5.64 KB
/
add_meetup_events.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Install the requirements in requirements.txt in a virtualenv
To see the latest output for past-events/index.md, execute
$ python add_meetup_events.py
To over-write with latest output, execute
$ python add_meetup_events.py --rewrite
"""
import httpx
from bs4 import BeautifulSoup
from collections import defaultdict
from datetime import datetime
from slugify import slugify
from jinja2 import Environment
import sys
from pathlib import Path
root = Path(__file__).parent.resolve()
# past events meetup URL
url = "https://www.meetup.com/bostonpython/events/past/"
# markdown file template saved under past-events
MD = """---
title: {{ title }}
sidebar_link: false
---
{{ event_date }}
{% for c in contents %}
{{ c }}
{% endfor %}
Meetup link: [{{ event_url }}]({{ event_url }})
[Back to Past Events Page](index.md)
"""
# list of all events which at the end are joined to contruct
# the index.md file inside past-events folder
past_events = [
"---",
"title: Past Events",
"sidebar_link: false",
"---\n",
"List of past events:\n",
]
if __name__ == "__main__":
try:
# by default this get only gets the 10 most recent events
r = httpx.get(url)
except httpx.RequestError as exc:
print(f"An error occurred while requesting {exc.request.url!r}.")
raise
soup = BeautifulSoup(r.content.decode('utf-8','ignore'), "lxml")
datemap = defaultdict(list)
# beautifulsoup finding the list of recent 10 events
# mapping to defaultdict datemap
lis = soup.find_all("li", {"class": "list-item border--none"})
for li in lis:
t = li.find("time")
d = datetime.fromtimestamp(int(t["datetime"][:10]))
year = d.year
datemap[year].append(li)
# get list of already existing event files under past-events
processed = Path('past-events').glob('**/*')
processed_files = [x for x in processed if x.is_file()]
# structure of data for existing_events
# {
# 2007: {
# datetime.date(2007, 5, 23): [
# ('The Cambridge Python May Meetup', '20070523-the-cambridge-python-may-meetup.md')
# ]
# }
# }
existing_events = defaultdict(lambda: defaultdict(list))
# iterate all the files under past-events and populate
# existing_events
for p in processed_files:
if p.name == "index.md":
continue
event_date = datetime.strptime(p.name.split("-")[0], "%Y%m%d").date()
lines = p.read_text(encoding="utf-8").split("\n")
title = None
for line in lines:
if line.startswith("title:"):
_, title = line.split(": ")
break
existing_events[event_date.year][event_date].append((title, p.name))
last_processed_year = max(existing_events.keys())
max_processed_date = max(existing_events[last_processed_year])
# now iterate over data downloaded from meetup and check whats not been processed
# and saved under past-events
for year, events in datemap.items():
for event in events:
link = event.find("a")
url = link["href"]
title = link.text
parts = [part.strip() for part in title.split(":")]
title = " - ".join(parts)
if title == "Monday office hour":
continue
t = event.find("time")
d = datetime.fromtimestamp(int(t["datetime"][:10]))
if d.date() <= max_processed_date:
continue
name = slugify(title)
filename = f"past-events/{d.strftime('%Y%m%d')}-{name}.md"
location = event.find("div", {"class": "venueDisplay"})
if not location:
location = event.find("p", {"class": "venueDisplay"})
attendees = event.find("li", {"class": "avatarRow--attendingCount"})
try:
count = attendees.text
except:
count = 0
contents = event.find_all("p", {"class": "text--small"})
new_contents = []
for content in contents:
if 'class' in content.attrs:
del content.attrs['class']
if 'style' in content.attrs:
del content.attrs['style']
new_contents.append(content)
event_date = d.strftime("%B %d, %Y")
mydict = {
"title": title,
"event_date": event_date,
"location": location.text,
"event_url": "https://www.meetup.com"+url,
"contents": new_contents,
}
# write event file to past-events
with open(filename, "w") as e:
output = Environment().from_string(MD).render(**mydict).replace('\xa0', '')
e.write(output)
print(f"Write {filename}")
# add to existing_events data structure
existing_events[year][d.date()].append((title, f"{d.strftime('%Y%m%d')}-{name}.md"))
# Now go over all existing_events and construct past_events list
for year in reversed(sorted(existing_events.keys())):
past_events.append(f"- {year}")
for dts in reversed(sorted(existing_events[year].keys())):
for title, filename in sorted(existing_events[year][dts], key=lambda x:x[0]):
past_events.append(f" - [{title}]({filename}) ({dts:%m/%d/%Y})")
# either print or over-write past-event/index.md file
if "--rewrite" in sys.argv:
index_file = root / "past-events" / "index.md"
index_txt = "\n".join(past_events).strip()
index_file.open("w").write(index_txt)
else:
print("\n".join(past_events))