-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfetch_instances.py
101 lines (93 loc) · 3.79 KB
/
fetch_instances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from hashlib import sha256
from multiprocessing import Lock, cpu_count, set_start_method, Process #Pool,
from httpx import AsyncClient
import aiohttp
import asyncio
import sqlite3
import sys
import json
async def get_hash(domain: str) -> str:
return sha256(domain.encode("utf-8")).hexdigest()
async def get_peers(domain: str) -> str:
try:
async with aiohttp.ClientSession() as session:
res = await session.get(f"https://{domain}/api/v1/instance/peers") #, headers=headers, timeout=5, allow_redirects=False)
resj = await res.json()
return resj
except Exception as e:
print(e)
return None
async def get_type(instdomain: str) -> str:
try:
async with aiohttp.ClientSession() as session:
res = await session.get(f"https://{instdomain}/nodeinfo/2.1.json", headers=headers, timeout=5, allow_redirects=False)
if res.status == 404:
res = await session.get(f"https://{instdomain}/nodeinfo/2.0", headers=headers, timeout=5, allow_redirects=False)
if res.status == 404:
res = await session.get(f"https://{instdomain}/nodeinfo/2.0.json", headers=headers, timeout=5, allow_redirects=False)
if res.ok and "text/html" in res.headers["content-type"]:
res = await session.get(f"https://{instdomain}/nodeinfo/2.1", headers=headers, timeout=5, allow_redirects=False)
if res.ok:
try:
resj = await res.json()
except aiohttp.ContentTypeError:
data = await res.read()
resj = json.loads(data)
if resj["software"]["name"] in ["akkoma", "rebased"]:
return "pleroma"
elif resj["software"]["name"] in ["hometown", "ecko"]:
return "mastodon"
elif resj["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick", "firefish", "iceshrimp"]:
return "misskey"
else:
return resj["software"]["name"]
elif res.status == 404:
res = await session.get(f"https://{instdomain}/api/v1/instance", headers=headers, timeout=5, allow_redirects=False)
if res.ok:
return "mastodon"
except Exception as e:
return None
async def write_instance(instance: str, c) -> bool:
try:
c.execute(
"select domain from instances where domain = ?", (instance,)
)
if c.fetchone() == None:
InstHash = await get_hash(instance)
InstType = await get_type(instance)
c.execute(
"insert into instances select ?, ?, ?",
(instance, InstHash, InstType),
)
conn.commit()
except Exception as e:
print("error:", e, instance)
return True
async def main():
global config
global headers
global domain
global conn
with open("config.json") as f:
config = json.loads(f.read())
headers = {"user-agent": config["useragent"]}
domain = sys.argv[1]
conn = sqlite3.connect("blocks.db")
c = conn.cursor()
peerlist = await get_peers(domain)
blacklist = [ "activitypub-troll.cf","gab.best","4chan.icu","social.shrimpcam.pw","mastotroll.netz.org","github.dev", "ngrok.io"]
async with asyncio.TaskGroup() as tg:
for peer in peerlist: #[:1000]:
peer = peer.lower()
blacklisted = False
for ddomain in blacklist:
if ddomain in peer:
blacklisted = True
if blacklisted:
continue
tg.create_task(write_instance(peer, c))
conn.close()
print(f"done with {domain}")
if __name__ == "__main__":
asyncio.run(main())
sys.exit()