-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlink_status.py
74 lines (59 loc) · 1.94 KB
/
link_status.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import config as cfg
import pymysql
import requests
import sys
from ThreadPool import ThreadPool
log_filename = "url_status.log"
timeout = 10.00
pool = ThreadPool(8)
def check_url(url):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}
r = requests.get(
url, stream=True, allow_redirects=True, headers=headers, timeout=timeout
)
output_status(url, r.status_code)
except Exception as ex:
output_status(url, 0, str(ex))
def get_externallinks(cursor):
# Pull links from the wiki database
try:
sql = "SELECT DISTINCT `el_to` FROM externallinks"
cursor.execute(sql)
return cursor.fetchall()
except Exception as ex:
sys.exit("Error retrieving external links: " + str(ex))
def get_wikidb():
try:
return pymysql.connect(
host=cfg.mysql["server"],
db=cfg.mysql["db"],
port=int(cfg.mysql["port"]),
user=cfg.mysql["user"],
password=cfg.mysql["pass"],
charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor,
)
except:
sys.exit(
"Error connecting to database - verify connection information in configuration"
)
def output_status(url, status, error=""):
with open(log_filename, "a") as f:
if error == "":
result = "%s\t%s" % (status, url)
else:
result = "%s\t%s\t%s" % (status, url, error)
print(result, file=f)
mysqldb = get_wikidb()
with mysqldb.cursor() as cursor:
externallinks = get_externallinks(cursor)
with open(log_filename, "w") as f:
print("Status\tURL\tError", file=f)
for link in externallinks:
url = link["el_to"].decode("utf-8").strip()
pool.add_task(check_url, (url))
pool.wait_completion()
mysqldb.close()