-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetWOTD.py
executable file
·49 lines (34 loc) · 1.09 KB
/
getWOTD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
import os
from urllib import urlopen
from datetime import date, timedelta
from threading import Thread
# The word url has a pattern
# http://www.merriam-webster.com/word-of-the-day/2011/11/01/
prefix = 'http://www.merriam-webster.com/word-of-the-day/'
start = date(2006, 9, 1)
end = date.today()
nthreads = 20
threads = [None] * nthreads
thread = 0
def get_wotd(day, filename):
content = urlopen(prefix + day.strftime('%Y/%m/%d/')).read()
f = open(filename, 'w')
f.write(content)
f.close()
print 'Wrote:', filename
def date_range(start, end):
for n in range(int((end - start).days)):
yield start + timedelta(n)
for day in date_range(start, end):
if threads[thread] is not None:
threads[thread].join()
dir = day.strftime('%Y/%m')
try:
os.makedirs(dir)
except: pass
filename = dir + day.strftime('/wotd-%Y%m%d.html')
if os.path.exists(filename): continue
threads[thread] = Thread(target = get_wotd, args = [day, filename])
threads[thread].start()
thread = thread + 1 if thread != nthreads -1 else 0