Skip to content

Commit

Permalink
improved scanning (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
aerickson authored Aug 2, 2024
1 parent 13ebd90 commit 011b8f7
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 35 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ poetry version patch
- simple substring is used now, check for `resource "BLAH"` or `resource 'BLAH'`
- don't false trigger, see 'Known Issues' above
- add an option to show the list of authoritative resources checked for
- provide links to documentation when an authoritative resource is detected
## Relevant Links
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tf-authoritative-scanner"
version = "1.0.4"
version = "1.1.0"
description = ""
authors = ["Andrew Erickson <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion tf_authoritative_scanner/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.4"
__version__ = "1.1.0"
116 changes: 87 additions & 29 deletions tf_authoritative_scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,15 @@


class TFAuthoritativeScanner:
authoritative_resources = [
"google_folder_iam_binding",
"google_folder_iam_policy",
"google_organization_iam_binding",
"google_organization_iam_policy",
"google_project_iam_audit_config",
"google_project_iam_binding",
"google_project_iam_policy",
"google_storage_bucket_iam_binding",
"google_storage_bucket_iam_policy",
]

# less interesting / not verified authoritative resources
_additional_resources = [
"google_compute_instance",
"google_storage_bucket",
"google_sql_database_instance",
"google_vpc_network",
"google_compute_firewall",
"google_compute_subnetwork",
"google_folder_iam_member",
"google_organization_iam_member",
"google_container_cluster",
"google_pubsub_topic",
"google_cloud_run_service",
# hand-verified authoritative GCP resources that don't match the _binding or _policy suffixes
# TODO: figure out a way of extracting these from the provider's source code or docs
# - https://github.com/GoogleCloudPlatform/magic-modules/
# cd mmv1/third_party/terraform/website/docs/r
# rg -i authoritat | grep -vi 'non-authoritative'
additional_authoritative_gcp_resources = [
"google_project_iam_audit_config", # https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam
"google_storage_bucket_acl", # https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket_acl
"google_dns_record_set", # https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/dns_record_set
]

exception_comment_pattern = re.compile(r"#\s*terraform_authoritative_scanner_ok")
Expand All @@ -42,13 +26,57 @@ def __init__(self, include_dotdirs, verbosity=0):
self.include_dotdirs = include_dotdirs
self.verbosity = verbosity

# examples:
# "google_project_iam_audit_config", # https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam
# "google_folder_iam_binding", # https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_folder_iam
def build_gcp_resource_doc_url_from_name(self, resource_name):
# remove _binding and _policy from resource_name
# TODO: handle non _binding or _policy ARs
resource_name = resource_name.replace("_binding", "")
resource_name = resource_name.replace("_policy", "")
# remove google_ prefix
resource_name = resource_name.replace("google_", "")
return f"https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_{resource_name}"

# from inspecting the GCP provider, basically anything with the '_policy' or '_binding'
# in the resource name is authoritative aka 'google*policy' or 'google*binding'.
# - see the GCP provider's docs
# https://github.com/GoogleCloudPlatform/magic-modules/blob/19bec78daccb664b42f915e1fc552dea6a64ea93/mmv1/templates/terraform/resource_iam.html.markdown.tmpl#L59-L60
def is_gcp_resource_name_authoritative(self, resource_name):
# if resource is on hardcoded list
if resource_name in self.additional_authoritative_gcp_resources:
return {"authoritative": True, "confidence": 100}
# if the resource name starts with 'google_' and ends with '_binding' or '_policy' then it is authoritative
if resource_name.startswith("google_") and (
resource_name.endswith("_binding") or resource_name.endswith("_policy")
):
return {"authoritative": True, "confidence": 85}
if resource_name.startswith("google_") and (resource_name.endswith("_audit_config")):
return {"authoritative": True, "confidence": 80}
return {"authoritative": False, "confidence": 90}

# improvements over earlier substring-based approach:
# - check word parts vs substring
# - use patterns vs hardcoded list
def authoritative_resource_in_line(self, line):
_confidence = 100
word_parts = _get_first_two_word_parts(line)
first_word, second_word = word_parts
if first_word == "resource":
r = self.is_gcp_resource_name_authoritative(second_word)
authoritative = r["authoritative"]
_confidence = r["confidence"]
if authoritative:
return {"authoritative": True, "confidence": _confidence}
return {"authoritative": False, "confidence": _confidence}

def check_file_for_authoritative_resources(self, file_path):
with open(file_path, "r") as file:
lines = file.readlines()

authoritative_lines = []
excepted_lines = []
authoritative = False
file_authoritative = False
previous_line = ""
for line_number, line in enumerate(lines, start=1):
stripped_line = line.strip()
Expand All @@ -57,19 +85,22 @@ def check_file_for_authoritative_resources(self, file_path):
previous_line = stripped_line
continue
# Check if the line contains any authoritative resource and is not excepted
if any(resource in line for resource in self.authoritative_resources):
r = self.authoritative_resource_in_line(stripped_line)
r_authoritative = r["authoritative"]
_r_confidence = r["confidence"]
if r_authoritative:
if not self.exception_comment_pattern.search(line) and not self.exception_comment_pattern.search(
previous_line
):
authoritative_lines.append({"line_number": line_number, "line": stripped_line})
authoritative = True
file_authoritative = True
else:
excepted_lines.append({"line_number": line_number, "line": stripped_line})
previous_line = stripped_line

return {
"file_path": file_path,
"authoritative": authoritative,
"authoritative": file_authoritative,
"authoritative_lines": authoritative_lines,
"excepted_lines": excepted_lines,
}
Expand Down Expand Up @@ -135,6 +166,9 @@ def run(self, paths):
sys.exit(0)


# TODO: move these to util files


def _verify_paths(paths):
for path in paths:
if not os.path.exists(path):
Expand All @@ -157,6 +191,30 @@ def _get_version(rel_path):
raise RuntimeError("Unable to find version string.")


def _remove_inner_quotes(s):
# Define patterns for both single and double quotes
double_quote_pattern = r"\"([^\"]*?)\""
single_quote_pattern = r"\'([^\']*?)\'"

# Remove inner quotes for double quotes
s = re.sub(double_quote_pattern, lambda m: m.group(0).replace('"', ""), s)
# Remove inner quotes for single quotes
s = re.sub(single_quote_pattern, lambda m: m.group(0).replace("'", ""), s)

return s


# known issue: returns "", "" on less than two word-part strings
def _get_first_two_word_parts(string):
word_parts = string.split()
if len(word_parts) < 2:
return "", ""
first_word = _remove_inner_quotes(word_parts[0])
second_word = _remove_inner_quotes(word_parts[1])
return first_word, second_word


# TODO: move this to a cli.py file
def main():
parser = argparse.ArgumentParser(description="Static analysis of Terraform files for authoritative GCP resources.")
parser.add_argument("paths", metavar="path", type=str, nargs="+", help="File or directory to scan")
Expand Down
44 changes: 40 additions & 4 deletions tf_authoritative_scanner/scanner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,10 @@ def test_check_directory_ok(self, scanner, temp_non_authoritative_tf_file):
assert len(r["results"][0]["authoritative_lines"]) == 0
assert len(r["results"][0]["excepted_lines"]) == 0

def test_check_known_issue(self, scanner, temp_tf_file_authoritative_resource_name_but_not_resource):
# see 'Known Issues' in README.md
# - ideally this would be 0 authoritative lines and 1 excepted line
def test_check_ar_in_name(self, scanner, temp_tf_file_authoritative_resource_name_but_not_resource):
r = scanner.check_paths_for_authoritative_resources([temp_tf_file_authoritative_resource_name_but_not_resource])
assert r["files_scanned"] == 1
assert len(r["results"][0]["authoritative_lines"]) == 1
assert len(r["results"][0]["authoritative_lines"]) == 0
assert len(r["results"][0]["excepted_lines"]) == 0

def test_check_exclude_comment_inline(self, scanner, temp_tf_file_with_exception_same_line):
Expand Down Expand Up @@ -204,3 +202,41 @@ def test_main_directory_exception(self, temp_tf_file_with_exception_same_line):
result = subprocess.run(["tfas", "-v", temp_tf_file_with_exception_same_line], capture_output=True, text=True)
assert result.stderr == ""
assert result.returncode == 0

# tests for authoritative_resource_in_line

def test_authoritative_resource_in_line_basic(self, scanner):
assert scanner.authoritative_resource_in_line('resource "google_project_iam_binding" "binding" {')[
"authoritative"
]
assert not scanner.authoritative_resource_in_line('resource "google_project_iam_funtime" "binding" {')[
"authoritative"
]

def test_authoritative_resource_in_line_complex(self, scanner):
# AR in the comment
assert not scanner.authoritative_resource_in_line(
'resource "google_project_iam_funtime" "a_google_project_iam_binding_test" {'
)["authoritative"]
# AR in a string
assert not scanner.authoritative_resource_in_line('a = "google_project_iam_binding"')["authoritative"]

# tests for build_gcp_resource_doc_url_from_name

def test_build_gcp_resource_doc_url_from_name(self, scanner):
assert (
scanner.build_gcp_resource_doc_url_from_name("google_project_iam_binding")
== "https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/google_project_iam"
)

# test is_gcp_resource_name_authoritative

def test_is_gcp_resource_name_authoritative(self, scanner):
assert scanner.is_gcp_resource_name_authoritative("google_project_iam_binding")
assert scanner.is_gcp_resource_name_authoritative("google_project_iam_audit_config")

def test_is_gcp_resource_name_authoritative_complex(self, scanner):
# google_project_iam_audit_config, known
assert scanner.is_gcp_resource_name_authoritative("google_project_iam_audit_config")
# pattern based, unknown
assert scanner.is_gcp_resource_name_authoritative("google_silly_future_audit_config")

0 comments on commit 011b8f7

Please sign in to comment.