Skip to content

Commit

Permalink
Prevent multiple SARIF inputs from the same tool (#742)
Browse files Browse the repository at this point in the history
  • Loading branch information
drdavella authored Jul 25, 2024
1 parent 8164574 commit c6a91dd
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/codemodder/codemodder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from codemodder.project_analysis.file_parsers.package_store import PackageStore
from codemodder.project_analysis.python_repo_manager import PythonRepoManager
from codemodder.result import ResultSet
from codemodder.sarifs import detect_sarif_tools
from codemodder.sarifs import DuplicateToolError, detect_sarif_tools
from codemodder.semgrep import run as run_semgrep


Expand Down Expand Up @@ -162,7 +162,7 @@ def run(original_args) -> int:
tool_result_files_map: DefaultDict[str, list[str]] = detect_sarif_tools(
[Path(name) for name in argv.sarif or []]
)
except FileNotFoundError as err:
except (DuplicateToolError, FileNotFoundError) as err:
logger.error(err)
return 1

Expand Down
11 changes: 11 additions & 0 deletions src/codemodder/sarifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ def detect(cls, run_data: dict) -> bool:
pass


class DuplicateToolError(ValueError): ...


def detect_sarif_tools(filenames: list[Path]) -> DefaultDict[str, list[str]]:
results: DefaultDict[str, list[str]] = defaultdict(list)

Expand All @@ -30,7 +33,15 @@ def detect_sarif_tools(filenames: list[Path]) -> DefaultDict[str, list[str]]:
try:
if det.detect(run):
logger.debug("detected %s sarif: %s", name, fname)
# According to the Codemodder spec, it is invalid to have multiple SARIF results for the same tool
# https://github.com/pixee/codemodder-specs/pull/36
if name in results:
raise DuplicateToolError(
f"duplicate tool sarif detected: {name}"
)
results[name].append(str(fname))
except DuplicateToolError as err:
raise err
except (KeyError, AttributeError, ValueError):
continue

Expand Down
47 changes: 45 additions & 2 deletions tests/test_sarif_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from codemodder.sarifs import detect_sarif_tools
from codemodder.sarifs import DuplicateToolError, detect_sarif_tools
from codemodder.semgrep import SemgrepResult, SemgrepResultSet


Expand Down Expand Up @@ -74,9 +74,52 @@ def test_codeql_sarif_input(self, tmpdir):
"--output",
tmpdir / "doesntmatter.txt",
"--codemod-include",
"secure-random",
"pixee:python/secure-random",
"--dry-run",
],
check=False,
)
assert completed_process.returncode == 0

def test_codeql_sarif_input_two_sarifs_same_tool(self, tmpdir):
tmpfile = Path(tmpdir) / "doesntmatter.sarif"
codeql_sarif = Path("tests/samples/webgoat_v8.2.0_codeql.sarif")
tmpfile.write_text(codeql_sarif.read_text())

completed_process = subprocess.run(
[
"codemodder",
"tests/samples/",
"--sarif",
f"{codeql_sarif},{tmpfile}",
"--output",
tmpdir / "doesntmatter.txt",
"--codemod-include",
"pixee:python/secure-random",
"--dry-run",
],
check=False,
capture_output=True,
)
assert completed_process.returncode == 1
assert (
"duplicate tool sarif detected: codeql" in completed_process.stderr.decode()
)

def test_two_sarifs_same_tool(self):
with pytest.raises(DuplicateToolError) as exc:
detect_sarif_tools([Path("tests/samples/webgoat_v8.2.0_codeql.sarif")] * 2)
assert "duplicate tool sarif detected: codeql" in str(exc.value)

def test_two_sarifs_different_tools(self):
results = detect_sarif_tools(
[
Path("tests/samples/webgoat_v8.2.0_codeql.sarif"),
Path("tests/samples/semgrep.sarif"),
]
)
assert len(results) == 2
assert "codeql" in results
assert "semgrep" in results
assert len(results["codeql"]) == 1
assert len(results["semgrep"]) == 1

0 comments on commit c6a91dd

Please sign in to comment.