-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalyzeLogFiles.py
82 lines (68 loc) · 2.64 KB
/
analyzeLogFiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import re
import subprocess
import os
from dotenv import load_dotenv
load_dotenv()
def list_csv_files(directory):
"""List all CSV files in the given directory."""
return [f for f in os.listdir(directory) if f.endswith('.csv')]
def extract_urls_from_csv(file_path):
# Read the CSV file
data = pd.read_csv(file_path)
# Regular expression for matching URLs
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
# List to store URLs
urls = []
# Iterate over each cell in the DataFrame
for _, row in data.iterrows():
for item in row:
# Find all URLs in the current cell and add them to the list
found_urls = re.findall(url_pattern, str(item))
urls.extend(found_urls)
return urls
def submit_url_to_cuckoo(url):
command = ["cuckoo", "submit", "--url", url]
try:
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode == 0:
print(f"Successfully submitted URL: {url}")
print(result.stdout)
return extract_task_id_from_result(result.stdout)
else:
print(f"Error submitting URL: {url}")
print(result.stderr)
return None
except Exception as e:
print(f"Exception occurred while submitting URL: {e}")
return None
def extract_task_id_from_result(result):
# Regular expression to match 'task with ID #<number>'
match = re.search(r"task with ID #(\d+)", result)
if match:
# Extract and return the task ID
return int(match.group(1))
else:
print("Task ID not found in the output.")
return None
def process_logs(input_directory, output_directory):
for csv_file in list_csv_files(input_directory):
file_path = os.path.join(input_directory, csv_file)
urls = extract_urls_from_csv(file_path)
task_ids = []
for url in urls:
task_id = submit_url_to_cuckoo(url)
if task_id is not None:
print(f"Task ID for URL {url}: {task_id}")
task_ids.append(str(task_id))
# Read the existing report file, append task IDs, and rewrite it
report_file_path = os.path.join(output_directory, f"report_{csv_file}")
with open(report_file_path, 'r') as f:
lines = f.readlines()
with open(report_file_path, 'w') as f:
for line, task_id in zip(lines, task_ids):
f.write(f"{line.strip()},{task_id}\n")
# Example usage
input_directory = 'logs'
output_directory = 'logReports'
process_logs(input_directory, output_directory)