Skip to content

Commit

Permalink
great out of the box
Browse files Browse the repository at this point in the history
  • Loading branch information
whitespots committed Dec 11, 2021
1 parent e12a687 commit b3427bc
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 22 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.idea/
.installed.cfg
*.egg
MANIFEST
Expand Down Expand Up @@ -83,7 +84,8 @@ ipython_config.py

# pyenv
.python-version

tests.py
test*
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ This scanner will help you indentify such repositories and hide sensitive data t
## How to run

1. Generate your `read_api` token
![token-generation](/token-generation.jpeg?raw=true "generation")
![token-generation](/images/token-generation.jpeg?raw=true "generation")
2. Run the following

```
Expand Down
File renamed without changes
4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ def scan_logic():
jobs = gitlab_scanner.get_all_pipeline_jobs(project_id, pipeline_id)
traces = gitlab_scanner.get_all_job_traces(project_id, jobs)
for trace in traces:
result = gitlab_scanner.find_sensitive_data(trace)
result = gitlab_scanner.find_sensitive_data(trace.get('trace'))
job_id = trace.get('job_id')
if result is not None and not gitlab_scanner.detect_secret_false_positive(result):
finding = {
'project_id': project_id,
'project_path_with_namespace': project_path_with_namespace,
'pipeline_id': pipeline_id,
'link': f'{gitlab_hostname}/{project_path_with_namespace}/-/jobs/{job_id}',
'issue': f'Unmasked secret value: {result}'
}
print(finding)
Expand Down
3 changes: 2 additions & 1 deletion patterns.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"variable": [
"^\\$\\w+",
"^\\${\\w+",
"=$"
"=$",
"\\$"
],
"masked": [
"MASKED"
Expand Down
57 changes: 39 additions & 18 deletions scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class GitlabScanner:
def __init__( self, gitlab_hostname, gitlab_private_token,check_for_false_positives):
def __init__(self, gitlab_hostname, gitlab_private_token, check_for_false_positives):
self.gitlab_hostname = gitlab_hostname
self.gitlab_private_token = gitlab_private_token
self.check_for_false_positives = check_for_false_positives
Expand All @@ -17,17 +17,17 @@ def get_all_groups(self):
print('Getting all groups')
# Get pages count
response = requests.get(
url=f'{self.gitlab_hostname}/api/v4/groups?all_available=true&page=1&per_page=50',
url=f'{self.gitlab_hostname}/api/v4/groups?all_available=true&page=1&per_page=5',
headers={
'PRIVATE-TOKEN': self.gitlab_private_token
}
)
total_pages = int(response.headers.get('x-total-pages'))
distinct_id_list = list()
# Getting data from all groups
for page_index in range(1, total_pages+1):
for page_index in range(1, total_pages + 1):
response = requests.get(
url=f'{self.gitlab_hostname}/api/v4/groups?all_available=true&page={page_index}&per_page=50',
url=f'{self.gitlab_hostname}/api/v4/groups?all_available=true&page={page_index}&per_page=80',
headers={
'PRIVATE-TOKEN': self.gitlab_private_token
}
Expand All @@ -50,7 +50,7 @@ def get_all_projects(self, group_id_list):
for group_id in group_id_list:
# Get pages count
response = requests.get(
url=f'{self.gitlab_hostname}/api/v4/groups/{group_id}/projects?include_subgroups=true&page=1&per_page=50',
url=f'{self.gitlab_hostname}/api/v4/groups/{group_id}/projects?include_subgroups=true&page=1&per_page=5',
headers={
'PRIVATE-TOKEN': self.gitlab_private_token
}
Expand All @@ -62,7 +62,7 @@ def get_all_projects(self, group_id_list):
print(f'[{group_id_list.index(group_id)}/{len(group_id_list)}] '
f'Getting projects for group id {group_id}. Page {page_index}/{total_pages}...')
response = requests.get(
url=f'{self.gitlab_hostname}/api/v4/groups/{group_id}/projects?include_subgroups=true&page={page_index}&per_page=50',
url=f'{self.gitlab_hostname}/api/v4/groups/{group_id}/projects?include_subgroups=true&page={page_index}&per_page=80',
headers={
'PRIVATE-TOKEN': self.gitlab_private_token
}
Expand Down Expand Up @@ -137,7 +137,10 @@ def get_all_job_traces(self, project_id, job_id_list):
'PRIVATE-TOKEN': self.gitlab_private_token
}
).text
traces.append(response)
traces.append({
'job_id': job_id,
'trace': response
})
return traces

def find_sensitive_data(self, trace):
Expand All @@ -148,23 +151,41 @@ def find_sensitive_data(self, trace):
)
result = pattern.findall(trace)
if result is not None and len(result) > 0:
if any([self.detect_secret_false_positive(str(match)) for match in result]):
if any([self.detect_secret_false_positive(match) for match in result]):
return None
return result
return None

def detect_secret_false_positive(self, finding):
print(f'Checking {finding} is false positive or not')
def detect_secret_false_positive(self, match):
print(f'Checking {match} is false positive or not')
if not self.check_for_false_positives:
return False
if len(finding) == 0:
return True
if finding[0] == '$':
if len(match) == 0:
return True

patterns = load_config()
for rule in patterns.get('false_positive'):
pattern = re.compile('|'.join(patterns.get('false_positive').get(rule)), re.IGNORECASE)
result = pattern.findall(str(finding))
if result is not None and len(result) > 0:
return True
for finding in match:
if type(finding) is type(tuple()):
for part in finding:
if part.startswith('$'):
return True
for rule in patterns.get('false_positive'):
pattern_list = patterns.get('false_positive').get(rule)
if type(pattern_list) is type(list()):
for pattern in pattern_list:
regexp = re.compile(pattern, re.IGNORECASE)
result = regexp.findall(str(part))
if result is not None and len(result) > 0:
return True
if type(finding) is type(str()):
if finding.startswith('$'):
return True
for rule in patterns.get('false_positive'):
pattern_list = patterns.get('false_positive').get(rule)
if type(pattern_list) is type(list()):
for pattern in pattern_list:
regexp = re.compile(pattern, re.IGNORECASE)
result = regexp.findall(str(finding))
if result is not None and len(result) > 0:
return True
return False
28 changes: 28 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from scanner import GitlabScanner
import os

# Common settings
# URL and user token
gitlab_private_token = ''
gitlab_hostname = os.environ.get('gitlab_hostname', '')

if not gitlab_private_token:
print('You must set gitlab_private_token and gitlab_hostname as environment variables')
exit(1)

# How many pipelines should be checked per repository
# Set 0 for unlimited count
pipeline_count_to_check = int(os.environ.get('pipeline_count_to_check', 1))
check_for_false_positives = bool(os.environ.get('check_for_false_positives', True))

gitlab_scanner = GitlabScanner(
gitlab_hostname=gitlab_hostname,
gitlab_private_token=gitlab_private_token,
check_for_false_positives=check_for_false_positives
)

print(f'First request to {gitlab_hostname}')

trace = gitlab_scanner.get_all_job_traces(111, [1111111])
result = gitlab_scanner.find_sensitive_data(str(trace[0]))
print(result)

0 comments on commit b3427bc

Please sign in to comment.