-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscdd.py
126 lines (120 loc) · 3.59 KB
/
scdd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python
#coding: utf-8
#
# scdd.py daemon process
#
# author: observer
# email: [email protected]
# blog: http://obmem.com
# last edit @ 2009.12.19
import os,sys,time
import re
from daemon import Daemon
import sqlite3
import fetchvc
from download import httpfetch
from Queue import Queue
from threading import Thread
class MyDaemon(Daemon):
def __init__(self,path,pid):
self.path = path
self.q = Queue()
Daemon.__init__(self,pid)
def thread_fetch(self):
conn = sqlite3.connect(self.path+'/verycd.sqlite3.db')
conn.text_factory = str
while True:
topic = self.q.get()
try:
fetchvc.fetch(topic,conn)
except:
pass
self.q.task_done()
def run(self):
for i in range(8):
t = Thread(target=self.thread_fetch)
t.setDaemon(True)
t.start()
conn = sqlite3.connect(self.path+'/verycd.sqlite3.db')
conn.text_factory = str
while True:
try:
#check searchqueue every 10 secs
taskqueue = open(self.path+'/searchqueue','r').readlines()
print taskqueue,time.mktime(time.gmtime()),time.mktime(time.gmtime())%900
open(self.path+'/searchqueue','w').write('')
for task in taskqueue:
url = 'http://www.verycd.com/search/folders/'+task
print 'fetching', url, '...'
res = httpfetch(url)
print '...fetching completed'
topics = re.compile(r'/topics/(\d+)',re.DOTALL).findall(res)
topics = set(topics)
for topic in topics:
self.q.put(topic)
if taskqueue == []:
time.sleep(10)
# read feed every 900 secs
if time.mktime(time.gmtime())%800<10:
url = 'http://www.verycd.com/sto/feed'
print 'fetching feed ...'
feeds = httpfetch(url)
topics = re.compile(r'/topics/(\d+)',re.DOTALL).findall(feeds)
topics = set(topics)
print topics
now = time.mktime(time.gmtime())
for topic in topics:
self.q.put(topic)
# read hot everyday at gmt 19:00
timeofday = time.mktime(time.gmtime())%86400
if timeofday>68400 and timeofday < 68410:
url = 'http://www.verycd.com/'
print 'fetching homepage ...'
home = httpfetch(url)
hotzone = re.compile(r'热门资源.*?</dl>',re.DOTALL).search(home).group()
hot = re.compile(r'<a href="/topics/(\d+)/"[^>]*>(《.*?》)[^<]*</a>',re.DOTALL).findall(hotzone)
html = '<h2 style="color:red">每日热门资源</h2>\n'
for topic in hot:
print 'fetching hot topic',topic[0],'...'
self.q.put(topic[0])
html += ' <a target="_parent" href="/?id=%s">%s</a> \n' % topic
open(self.path+'/static/hot.html','w').write(html)
# update 20 whole pages at gmt 19:10
if timeofday>69000 and timeofday < 69010:
urlbase = 'http://www.verycd.com/sto/~all/page'
for i in range(1,20):
print 'fetching list',i,'...'
url = urlbase+str(i)
res = httpfetch(url)
res2 = re.compile(r'"topic-list"(.*?)"pnav"',re.DOTALL).findall(res)
if res2:
res2 = res2[0]
else:
continue
topics = re.compile(r'/topics/(\d+)',re.DOTALL).findall(res2)
topics = set(topics)
print topics
for topic in topics:
q.put(topic)
except:
time.sleep(10)
continue
if __name__ == "__main__":
path = os.path.dirname(os.path.realpath(sys.argv[0]))
daemon = MyDaemon(path=path,pid='/tmp/simplevc.pid')
if len(sys.argv) == 2:
if 'start' == sys.argv[1]:
daemon.start()
elif 'stop' == sys.argv[1]:
daemon.stop()
elif 'restart' == sys.argv[1]:
daemon.restart()
elif 'run' == sys.argv[1]:
daemon.run()
else:
print "Unknown command"
sys.exit(2)
sys.exit(0)
else:
print "usage: %s start|stop|restart" % sys.argv[0]
sys.exit(2)