forked from Freekers/whitelist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchecker.py
79 lines (65 loc) · 2.35 KB
/
checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import requests
import os
import shutil
import logging
# Set up logging
logging.basicConfig(
level=logging.ERROR,
filename="error.log",
filemode="w",
format="%(asctime)s - %(levelname)s - %(message)s",
)
def check_url(url):
try:
response = requests.head(url, timeout=5, allow_redirects=True)
if response.status_code < 400:
print(f"URL resolves: {url} - Status code: {response.status_code}")
return True
else:
print(f"URL does not resolve: {url} - Status code: {response.status_code}")
return False
except requests.exceptions.RequestException as e:
logging.error(f"Error accessing {url}: {e}")
print(f"Failed to access URL: {url}")
return False
def process_file(file_path, output_dir):
print(f"Processing file: {file_path}")
with open(file_path, "r") as file:
lines = file.readlines()
os.makedirs(output_dir, exist_ok=True)
output_file_path = os.path.join(output_dir, os.path.basename(file_path))
with open(output_file_path, "w") as file:
for line in lines:
if (
line.strip()
and not line.strip().startswith("#")
and not line.strip().startswith("/")
):
url = (
f"http://{line.strip()}"
if not line.strip().startswith("http")
else line.strip()
)
if check_url(url):
file.write(line)
def process_all_files(folder, output_folder):
for filename in os.listdir(folder):
if filename.endswith(".txt"):
process_file(os.path.join(folder, filename), output_folder)
def sync_files(source_dir, backup_dir):
print(f"Syncing files from {source_dir} to {backup_dir}")
os.makedirs(backup_dir, exist_ok=True)
for file_name in os.listdir(source_dir):
source_file = os.path.join(source_dir, file_name)
backup_file = os.path.join(backup_dir, file_name)
shutil.copy2(source_file, backup_file)
print("Backup complete.")
# Directory setup
input_folder = "domains"
output_folder = "cleaned"
backup_folder = "backup"
# Start processing
print("Starting the URL checking process...")
process_all_files(input_folder, output_folder)
sync_files(output_folder, backup_folder)
print("Processing completed.")