From 828ed6a58e1f919dcdfe899495b5a84af31211aa Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Wed, 24 Jul 2024 14:58:32 -0400 Subject: [PATCH] Prevent multiple SARIF inputs from the same tool --- src/codemodder/codemodder.py | 4 +-- src/codemodder/sarifs.py | 11 ++++++++ tests/test_sarif_processing.py | 47 ++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 9f7e3317..51f7fa66 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -19,7 +19,7 @@ from codemodder.project_analysis.file_parsers.package_store import PackageStore from codemodder.project_analysis.python_repo_manager import PythonRepoManager from codemodder.result import ResultSet -from codemodder.sarifs import detect_sarif_tools +from codemodder.sarifs import DuplicateToolError, detect_sarif_tools from codemodder.semgrep import run as run_semgrep @@ -162,7 +162,7 @@ def run(original_args) -> int: tool_result_files_map: DefaultDict[str, list[str]] = detect_sarif_tools( [Path(name) for name in argv.sarif or []] ) - except FileNotFoundError as err: + except (DuplicateToolError, FileNotFoundError) as err: logger.error(err) return 1 diff --git a/src/codemodder/sarifs.py b/src/codemodder/sarifs.py index 29f2aea5..37f4261b 100644 --- a/src/codemodder/sarifs.py +++ b/src/codemodder/sarifs.py @@ -15,6 +15,9 @@ def detect(cls, run_data: dict) -> bool: pass +class DuplicateToolError(ValueError): ... + + def detect_sarif_tools(filenames: list[Path]) -> DefaultDict[str, list[str]]: results: DefaultDict[str, list[str]] = defaultdict(list) @@ -30,7 +33,15 @@ def detect_sarif_tools(filenames: list[Path]) -> DefaultDict[str, list[str]]: try: if det.detect(run): logger.debug("detected %s sarif: %s", name, fname) + # According to the Codemodder spec, it is invalid to have multiple SARIF results for the same tool + # https://github.com/pixee/codemodder-specs/pull/36 + if name in results: + raise DuplicateToolError( + f"duplicate tool sarif detected: {name}" + ) results[name].append(str(fname)) + except DuplicateToolError as err: + raise err except (KeyError, AttributeError, ValueError): continue diff --git a/tests/test_sarif_processing.py b/tests/test_sarif_processing.py index f3d2bdb4..4a055881 100644 --- a/tests/test_sarif_processing.py +++ b/tests/test_sarif_processing.py @@ -4,7 +4,7 @@ import pytest -from codemodder.sarifs import detect_sarif_tools +from codemodder.sarifs import DuplicateToolError, detect_sarif_tools from codemodder.semgrep import SemgrepResult, SemgrepResultSet @@ -74,9 +74,52 @@ def test_codeql_sarif_input(self, tmpdir): "--output", tmpdir / "doesntmatter.txt", "--codemod-include", - "secure-random", + "pixee:python/secure-random", "--dry-run", ], check=False, ) assert completed_process.returncode == 0 + + def test_codeql_sarif_input_two_sarifs_same_tool(self, tmpdir): + tmpfile = Path(tmpdir) / "doesntmatter.sarif" + codeql_sarif = Path("tests/samples/webgoat_v8.2.0_codeql.sarif") + tmpfile.write_text(codeql_sarif.read_text()) + + completed_process = subprocess.run( + [ + "codemodder", + "tests/samples/", + "--sarif", + f"{codeql_sarif},{tmpfile}", + "--output", + tmpdir / "doesntmatter.txt", + "--codemod-include", + "pixee:python/secure-random", + "--dry-run", + ], + check=False, + capture_output=True, + ) + assert completed_process.returncode == 1 + assert ( + "duplicate tool sarif detected: codeql" in completed_process.stderr.decode() + ) + + def test_two_sarifs_same_tool(self): + with pytest.raises(DuplicateToolError) as exc: + detect_sarif_tools([Path("tests/samples/webgoat_v8.2.0_codeql.sarif")] * 2) + assert "duplicate tool sarif detected: codeql" in str(exc.value) + + def test_two_sarifs_different_tools(self): + results = detect_sarif_tools( + [ + Path("tests/samples/webgoat_v8.2.0_codeql.sarif"), + Path("tests/samples/semgrep.sarif"), + ] + ) + assert len(results) == 2 + assert "codeql" in results + assert "semgrep" in results + assert len(results["codeql"]) == 1 + assert len(results["semgrep"]) == 1