-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsecret_scanner.py
177 lines (161 loc) · 7.22 KB
/
secret_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import glob
import os
import re
import sys
import argparse
parser = argparse.ArgumentParser(
prog='Secret Scanner',
usage='This tool is used for scanning project directory for secrets that should not be shared.'
'Run .exe or .py file in your terminal. Use -h/--help flag to get more information.',
description='The program scans the files of the current or specified (with -p/--path) directory for secrets '
'(password, secret key etc.) that have not been hidden (e.g. added to .env). '
'By default files added to .gitignore are NOT scanned. You can change it by using the -a/--all flag. '
'After scanning a summary of the results is given in "secret_scanner_results.txt" file '
'and in case secrets are found the detailed report is given in the same file'
'(generated in the scanned dir and added to .gitignore) '
'and in the terminal.',
epilog='The author is Elena Kolomeets (GitHub elena-kolomeets).',
)
parser.add_argument(
'-p', '--path',
type=str,
default='.',
required=False,
help='Provide the absolute or relative path to the directory that needs to be scanned for secrets. '
'By default the current directory is used.',
metavar='[path]',
)
parser.add_argument(
'-a', '--all',
required=False,
action='store_true',
help='Use this flag to scan all files, including the files added to .gitignore (not scanned by default)',
)
args = parser.parse_args()
def ignore(user_path):
"""
Checking if dir contains .gitignore
and returns a list of all ignored files from all subdirs.
:param user_path: the path given by the user with -p/--path parameter
:return: a list of files to ignore while scanning
"""
gitignore, ignore_list = [], []
if os.path.exists(user_path + '/.gitignore'):
with open(user_path + '/.gitignore') as g:
for line in g:
if line != '' and line != '\n':
if '\n' in line:
line = line[:-1]
if line[-1] == '/':
line = line + '*'
if line[:2] == '**':
line = line[2:]
gitignore.append(line)
for i in gitignore:
i = user_path + '/**/' + i + '*'
for j in glob.iglob(i, recursive=True):
ignore_list.append(os.path.relpath(j, start=user_path))
return ignore_list
def generate_words():
"""
Generating a list of words to search for while scanning for secrets.
:return: the list of secret words and expressions
"""
words_case, secret_words = [], []
for word in ['password', 'pass', 'key', 'token', 'credential', 'database_url', 'database-url', 'db_url', 'db-url']:
words_case.extend((word, word.upper(), word.capitalize()))
for word in words_case:
secret_words.extend((word+':', word+' :', word+'=', word+' =', '_'+word, word+'s', word+' is'))
return secret_words
def scan(userpath, scan_ignore, secret_words):
"""
Scanning files in the folder and subfolders for secrets
and adding file names and their lines with secrets to list of dictionaries
:param userpath:
:param scan_ignore: flag
:param secret_words: list of secret words to scan for
:return: file_list: list of dictionaries with the results
"""
file_list = []
for name in glob.iglob(userpath, recursive=True):
if os.path.isdir(name) or os.path.relpath(name, start=userpath)[6:] in scan_ignore \
or os.path.basename(name) == 'secret_scanner_results.txt':
continue
try:
for secret_word in secret_words:
if secret_word in os.path.basename(name):
file_list.append({'file_name': os.path.relpath(name, start=userpath)[6:]})
with open(name, mode='r', encoding='utf-8') as f:
for file_line in f:
for secret_word in secret_words:
if secret_word in file_line and \
re.search(r'([a-zA-Z]+[0-9]+|[0-9]+[a-zA-Z]+)[+?@^!§$%&]*\S{8,}', file_line):
file_list.append({'file_name': os.path.relpath(name, start=userpath)[6:],
'file_line': file_line.strip()})
break # go to next file if already found one secret in the line
except Exception:
# continue scanning if some files could not be open (like images or executables)
continue
return file_list
def write_output(user_path, file_list, output):
"""
Writing the scan output to the terminal and 'secret_scanner_results.txt',
adding the detailed report of the results to the file,
adding the file with results to .gitignore.
:param user_path:
:param file_list:
:param output:
:return: file secret_scanner_results.txt
"""
print(output, file=sys.stdout)
with open(user_path + '/' + 'secret_scanner_results.txt', mode='w') as f:
f.write(output)
if file_list:
for results in file_list:
f.write('\n')
for key, value in results.items():
f.write(key + ': ' + value+'\n')
with open(user_path + '/.gitignore', mode='a+') as g:
g.seek(0)
if '\nsecret_scanner_results.txt' not in g.read():
g.write('\nsecret_scanner_results.txt')
def main(user_path_raw, all_files):
"""
The program scans all files in all subdirectories of given directory
for exposed secrets locally using the algorithm developed by the author.
Files added to .gitignore can be scanned too (with -a/-all flag).
The results are written in the terminal and in "secret_scanner_results.txt".
:param user_path_raw:
:param all_files:
:return: executing all functions
"""
# normalize the given path
os.path.normpath(user_path_raw)
user_path = user_path_raw.replace(os.sep, '/')
if os.path.isdir(user_path):
file_list = []
# check the size of the folder to scan
if len(glob.glob(user_path+'/**/*', recursive=True)) <= 3000:
secret_words = generate_words()
# check if -a/--all flag is used (to include all .gitignore files)
if not all_files:
scan_ignore = ignore(user_path)
else:
scan_ignore = []
# scan the files for secrets
dot_file_list = scan(user_path+'/**/.*', scan_ignore, secret_words)
file_list = scan(user_path + '/**/*', scan_ignore, secret_words)
# merge lists with results
file_list.extend(dot_file_list)
# generate output values for different cases
if not file_list:
output = "No secrets found, good job! Keep an eye on them anyway as no tool is perfect."
else:
output = f"The scanner found {len(file_list)} possible secret exposure(s).\n\n"
else:
output = "The given directory is too large."
write_output(user_path, file_list, output)
else:
print("The given path is not found. Shall we try another one?", file=sys.stdout)
if __name__ == '__main__':
main(args.path, args.all)