This repository was archived by the owner on Aug 24, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcaddy2ncsa.py
executable file
·160 lines (134 loc) · 5.26 KB
/
caddy2ncsa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
import json, sys, getopt, tarfile, tempfile, os, gzip
from datetime import datetime
version = "1.3.0"
def get_log_tar(filename):
tmpFolder = tempfile.TemporaryDirectory()
logs = []
print("TAR: Using temporary folder '{}'".format(tmpFolder))
with tmpFolder as folder:
print("TAR: Extracting '{}'".format(filename))
tar = tarfile.open(filename, "r")
tar.extractall(path=folder)
tar.close()
files = os.listdir(folder)
print("TAR: Got files: {}".format(files))
for file in files:
fileName = os.path.join(folder, file)
print("TAR: Opening File '{}'".format(fileName))
logs.extend(get_log_normal(fileName))
return logs
def get_log_gzip(filename):
tmpFolder = tempfile.TemporaryDirectory()
print("GZIP: Reading Compressed '{}'".format(filename))
file = gzip.open(filename, "rb")
data = file.read()
file.close()
with tmpFolder as folder:
fileName = os.path.join(folder, "caddy.log")
print("GZIP: Writing Uncompressed '{}'".format(fileName))
file = open(fileName, "wb")
file.write(data)
file.close()
print("LOG: Reading JSON '{}'".format(fileName))
file = open(fileName, "r")
logs = get_log_file(file)
return logs
def get_log_normal(filename):
print("LOG: Reading JSON '{}'".format(filename))
file = open(filename, "r")
return get_log_file(file)
def get_log_file(file):
file.seek(0)
jsonLog = "["
lineNum = 1
numLines = sum(1 for line in file)
file.seek(0)
for line in file:
jsonLog += line
if lineNum < numLines:
jsonLog += ","
lineNum += 1
file.close()
jsonLog += "]"
jsonData = json.loads(jsonLog)
return jsonData
def get_log(filename):
if tarfile.is_tarfile(filename):
return get_log_tar(filename)
elif filename.endswith("gz"):
return get_log_gzip(filename)
else:
return get_log_normal(filename)
# Gets a element from a elements list or object, returns "" by default and returns the first item of element by default
def get_element(elements, element, first=True, default=""):
if not isinstance(elements, (list, object)):
print("Cannot get element '{}' of non-list and non-object!".format(element))
exit(1)
if element in elements:
value = elements[element]
if isinstance(value, list) and first:
return value[0]
else:
return value
return default
def write_common_log(logs, filename):
print("CLW: Writing NCSA log '{}'".format(filename))
file = open(filename, "w")
for log in logs:
timestamp = get_element(log, "ts")
size = get_element(log, "size")
status = get_element(log, "status")
request = get_element(log, "request")
remoteAddress = get_element(request, "remote_addr").split(":")[0]
uri = get_element(request, "uri")
protocol = get_element(request, "proto")
method = get_element(request, "method")
headers = get_element(request, "headers")
userAgent = get_element(headers, "User-Agent")
referer = get_element(headers, "Referer")
realIp = get_element(headers, "X-Forwarded-For", default=None)
# This is required to resolve the real IP of the client behind Cloudflare/Proxies
if realIp != None:
remoteAddress = realIp
tls = get_element(request, "tls")
serverName = get_element(tls, "server_name")
timestamp = datetime.utcfromtimestamp(timestamp).strftime('%d/%b/%Y:%H:%M:%S')
# Below is the NCSA vhost format, we transform the Caddy log into this so that goaccess gets the most data
# %v:%^ %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
file.write("{}:443 {} [{} +0200] \"{} {} {}\" {} {} \"{}\" \"{}\"\n".format(
serverName, remoteAddress, timestamp, method, uri, protocol, status, size, referer, userAgent))
file.close()
def main(argv):
usageString = "{} -o <outputFile> [-i <inputFile>, -d <inputDir>,..]".format(sys.argv[0])
inputFiles = []
outputFile = ''
try:
opts, args = getopt.getopt(argv,"hi:o:d:",["inputfile=","outputfile=","inputdir="])
except getopt.GetoptError:
print(usageString)
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print(usageString)
print("\nAccepted Compressed files: GZIP, TAR, BZ2, LZMA")
print("Accepted Uncompressed files: Caddy 2 Structured Log")
sys.exit()
elif opt in ("-i", "--inputfile"):
inputFiles.append(arg)
elif opt in ("-o", "--outputfile"):
outputFile = arg
elif opt in ("-d", "--inputdir"):
files = os.listdir(arg)
for file in files:
inputFiles.append(os.path.join(arg, file))
print("Input Files: {}\nOutput File: {}".format(inputFiles, outputFile))
fullLog = []
for file in inputFiles:
if file != outputFile:
fullLog.extend(get_log(file))
write_common_log(fullLog, outputFile)
if __name__ == "__main__":
print("\tCaddy v2 JSON log to NCSA vHost log converter")
print("\tVersion {}; Copyright 2015-2020 (c) ATVG-Studios\n".format(version))
main(sys.argv[1:])