Skip to content

Commit

Permalink
Expand Python Snapshots (#1473)
Browse files Browse the repository at this point in the history
* Enhance arguments, don't independently build by default.

Signed-off-by: Caroline Russell <[email protected]>

* Prepend env variables to cdxgen command.

Signed-off-by: Caroline Russell <[email protected]>

* Add python repos.

Signed-off-by: Caroline Russell <[email protected]>

* Correction.

Signed-off-by: Caroline Russell <[email protected]>

* Correction.

Signed-off-by: Caroline Russell <[email protected]>

* Expand multi-version repos correctly in tests.

Signed-off-by: Caroline Russell <[email protected]>

* Eliminate redundant read_csv.

Signed-off-by: Caroline Russell <[email protected]>

* Check file exists before converting to 1.6.

Signed-off-by: Caroline Russell <[email protected]>

* Suppress git detached head messages.

Signed-off-by: Caroline Russell <[email protected]>

* Run cdxgen in debug mode.

Signed-off-by: Caroline Russell <[email protected]>

* Set VIRTUAL_ENV for Python.

Signed-off-by: Caroline Russell <[email protected]>

* Remove src_repos.

Signed-off-by: Caroline Russell <[email protected]>

* Set VIRTUAL_ENV for Python.

Signed-off-by: Caroline Russell <[email protected]>

* Make cdxgen use setup.py for funcy.

Signed-off-by: Caroline Russell <[email protected]>

* Add requests.

Signed-off-by: Caroline Russell <[email protected]>

* Replace numba with wrapt.

Signed-off-by: Caroline Russell <[email protected]>

* Replace django-goat with genforce.

Signed-off-by: Caroline Russell <[email protected]>

* Replace wrapt with numpy.

Signed-off-by: Caroline Russell <[email protected]>

* Handle python with native c.

Signed-off-by: Caroline Russell <[email protected]>

* Don't build numpy before invoking.

Signed-off-by: Caroline Russell <[email protected]>

* Ensure snapshot directories are removed.

Signed-off-by: Caroline Russell <[email protected]>

---------

Signed-off-by: Caroline Russell <[email protected]>
  • Loading branch information
cerrussell authored Nov 29, 2024
1 parent 8a2a602 commit 5abfcd5
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 76 deletions.
1 change: 1 addition & 0 deletions .github/workflows/snapshot-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
env:
SHELL: bash
run: |
rm -rf original_snapshots new_snapshots src_repos
cdxgen_tarball=$(pnpm pack | tail -1)
npm install -g "$cdxgen_tarball"
git clone https://github.com/appthreat/cdxgen-samples.git original_snapshots
Expand Down
18 changes: 5 additions & 13 deletions test/diff/diff_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from custom_json_diff.lib.custom_diff_classes import Options
from custom_json_diff.lib.utils import json_dump,json_load

from generate import filter_repos
from generate import filter_repos, read_csv

logging.disable(logging.INFO)

Expand Down Expand Up @@ -51,6 +51,8 @@ def build_args():
def compare_snapshot(dir1: str, dir2: str, options: Options, repo: Dict, migrate_legacy: bool):
bom_1 = f"{dir1}/{repo['project']}-bom.json"
bom_2 = f"{dir2}/{repo['project']}-bom.json"
if not (b1 := os.path.exists(bom_1)) or not os.path.exists(bom_2):
return 1, "" if b1 else f"{bom_1} not found.", "" if os.path.exists(bom_2) else f"{bom_2} not found."
if migrate_legacy:
bom_data = migrate_to_1_6(bom_1)
bom_1 = bom_1.replace("bom.json", "bom.migrated.json")
Expand All @@ -61,8 +63,6 @@ def compare_snapshot(dir1: str, dir2: str, options: Options, repo: Dict, migrate
options.file_1 = bom_1
options.file_2 = bom_2
options.output = f'{dir2}/{repo["project"]}-diff.json'
if not (b1 := os.path.exists(bom_1)) or not os.path.exists(bom_2):
return 1, "" if b1 else f"{bom_1} not found.", "" if os.path.exists(bom_2) else f"{bom_2} not found."
status, j1, j2 = compare_dicts(options)
if status:
status, result_summary = perform_bom_diff(j1, j2)
Expand All @@ -72,15 +72,15 @@ def compare_snapshot(dir1: str, dir2: str, options: Options, repo: Dict, migrate


def perform_snapshot_tests(dir1: str, dir2: str, projects: List, project_types: Set, migrate_legacy: bool, skipped_projects):
repo_data = read_csv(projects, project_types, skipped_projects)
csv_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "repos.csv")
repo_data = read_csv(csv_file, projects, project_types, skipped_projects)
options = Options(
allow_new_versions=True,
allow_new_data=True,
preconfig_type="bom",
include=["properties", "evidence", "licenses"],
exclude=["annotations"]
)

failed_diffs = {}
for repo in repo_data:
status, result, summary = compare_snapshot(dir1, dir2, options, repo, migrate_legacy)
Expand Down Expand Up @@ -114,14 +114,6 @@ def migrate_to_1_6(bom_file):
return bom_data


def read_csv(projects, project_types, skipped_projects):
csv_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "repos.csv")
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
repo_data = list(reader)
return filter_repos(repo_data, projects, project_types, skipped_projects)


if __name__ == '__main__':
args = build_args()
if args.project_types:
Expand Down
125 changes: 75 additions & 50 deletions test/diff/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,27 @@ def build_args():
parser.add_argument(
'--skip-clone',
action='store_false',
dest='skip_clone',
default=True,
help='Skip cloning the repositories (must be used with the --repo-dir argument)'
dest='clone',
help='Skip cloning the repositories.)'
)
parser.add_argument(
'--debug-cmds',
action='store_true',
dest='debug_cmds',
help='For use in workflow'
help='Prints the commands out in the console.'
)
parser.add_argument(
'--skip-build',
'--build',
action='store_true',
dest='skip_build',
default=False,
help='Skip building the samples and just run cdxgen. Should be used with --skip-clone'
help='Build the samples before invoking cdxgen.'
)
parser.add_argument(
'--skip-prebuild',
action='store_false',
default=True,
dest='prebuild',
help='Build the samples before invoking cdxgen.'
)
parser.add_argument(
'--skip-projects',
Expand All @@ -79,6 +84,13 @@ def build_args():
help='Location to activate sdkman.',
default='~/.sdkman/bin/sdkman-init.sh'
)
parser.add_argument(
'--uv-location'
'-uv',
help='Location of uv Python installations.',
default='~/.local/share/uv/python',
dest='uv_location'
)
return parser.parse_args()


Expand Down Expand Up @@ -128,7 +140,7 @@ def checkout_commit(commit):
Returns:
str: The command to check out the commit.
"""
checkout_cmd = ['git', 'checkout', commit]
checkout_cmd = ['git', '-c', 'advice.detachedHead=false', 'checkout', commit]
return list2cmdline(checkout_cmd)


Expand All @@ -151,64 +163,56 @@ def clone_repo(url, repo_dir):
return list2cmdline(clone_cmd)


def create_python_venvs(repo_data):
def create_python_venvs(repo, uv_location):
"""
Sets the Python version for each Python repository
Args:
repo_data (list[dict]): Contains the sample repository data
repo (dict): Contains the sample repository data
Returns:
list[dict]: The updated repository data
cmd (str): The command to create the Python virtual environment and install
"""
for r in repo_data:
if r["language"] == "python":
if r["package_manager"] == "poetry":
r["build_cmd"] = f"poetry env use python{r['language_range']} && {r['build_cmd']}"
else:
r["build_cmd"] = f"python{r['language_range']} -m venv .venv; source .venv/bin/activate && {r['build_cmd']}"
return repo_data
vers = repo["language_range"].split(".")
py_cmd = f"{uv_location}/cpython-{repo['language_range']}-linux-x86_64-gnu/bin/python{vers[0]}.{vers[1]}"
if repo["package_manager"] == "poetry":
return f"poetry env use {py_cmd} && {repo['build_cmd']}"
else:
return f"{py_cmd} -m venv .venv;source .venv/bin/activate && {repo['build_cmd']}"


def exec_on_repo(clone, output_dir, skip_build, repo):
def exec_on_repo(args, repo):
"""
Determines a sequence of commands on a repository.
Args:
clone (bool): Indicates whether to clone the repository.
output_dir (pathlib.Path): The directory to output the slices.
skip_build (bool): Indicates whether to skip the build phase.
args (argparse.Namespace): The parsed arguments
repo (dict): The repository information.
Returns:
str: The sequence of commands to be executed.
"""
commands = []
if clone:
if args.clone:
commands.append(f'{clone_repo(repo["link"], repo["repo_dir"])}')
commands.append(f'{list2cmdline(["cd", repo["repo_dir"]])}')
commands.append(f'{checkout_commit(repo["commit"])}')
if not skip_build and repo["pre_build_cmd"]:
if args.prebuild and repo["pre_build_cmd"]:
cmds = repo["pre_build_cmd"].split(';')
cmds = [cmd.lstrip().rstrip() for cmd in cmds]
for cmd in cmds:
new_cmd = list(cmd.split(" "))
commands.append(f"{list2cmdline(new_cmd)}")
if not skip_build and repo["build_cmd"]:
if "python" in repo["language"]:
commands.append(create_python_venvs(repo, args.uv_location))
elif args.build and repo["build_cmd"]:
cmds = repo["build_cmd"].split(";")
cmds = [cmd.lstrip().rstrip() for cmd in cmds]
for cmd in cmds:
new_cmd = list(cmd.split(" "))
# if repo["language"] == "dotnet":
# new_cmd.extend(["-r", f"{repo['language_range']}"])
commands.append(f"{list2cmdline(new_cmd)}")
# if repo["language"] == "python":
# if repo["package_manager"] == "pip":
# cdxgen_cmd = f"source .venv/bin/activate && {cdxgen_cmd}"
# else:
# cdxgen_cmd = f"poetry env use {repo['language_range']} && {cdxgen_cmd}"
commands.append(run_cdxgen(repo, output_dir))
commands.append(run_cdxgen(repo, args.output_dir))
commands = "\n".join(commands)
return commands

Expand All @@ -234,7 +238,7 @@ def expand_multi_versions(repo_data):
new_data.append(new_repo)
else:
new_data.append(r)
return create_python_venvs(new_data)
return new_data


def filter_repos(repo_data, projects, project_types, skipped_projects):
Expand Down Expand Up @@ -270,19 +274,19 @@ def generate(args):
project_types = {args.project_types}

repo_data = read_csv(args.repo_csv, args.projects, project_types, args.skip_projects)
processed_repos = add_repo_dirs(args.clone_dir, expand_multi_versions(repo_data))
processed_repos = add_repo_dirs(args.clone_dir, repo_data)

if not args.debug_cmds:
check_dirs(args.skip_clone, args.clone_dir, args.output_dir)
check_dirs(args.clone, args.clone_dir, args.output_dir)

if not args.skip_build:
if args.prebuild:
run_pre_builds(repo_data, args.output_dir, args.debug_cmds, args.sdkman_sh)

commands = ""
cdxgen_log = args.output_dir.joinpath("generate.log")
for repo in processed_repos:
commands += f"\necho {repo['project']} started: $(date) >> {cdxgen_log}\n"
commands += exec_on_repo(args.skip_clone, args.output_dir, args.skip_build, repo)
commands += exec_on_repo(args, repo)
commands += f"\necho {repo['project']} finished: $(date) >> {cdxgen_log}\n\n"

commands = "".join(commands)
Expand Down Expand Up @@ -384,7 +388,7 @@ def read_csv(csv_file, projects, project_types, skipped_projects):
Reads a CSV file and filters the data based on a list of languages.
Parameters:
csv_file (pathlib.Path): The path to the CSV file.
csv_file (pathlib.Path| str): The path to the CSV file.
projects (list): A list of projects names to filter on.
project_types (set): A set of project types to filter on.
Returns:
Expand All @@ -393,7 +397,7 @@ def read_csv(csv_file, projects, project_types, skipped_projects):
with open(csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
repo_data = list(reader)
return filter_repos(repo_data, projects, project_types, skipped_projects)
return expand_multi_versions(filter_repos(repo_data, projects, project_types, skipped_projects))


def run_cdxgen(repo, output_dir):
Expand All @@ -407,16 +411,37 @@ def run_cdxgen(repo, output_dir):
Returns:
str: The repository data with cdxgen commands
"""
cdxgen_cmd = [
'cdxgen',
"--no-include-formulation",
'-t',
repo['language'],
'-o',
Path.joinpath(output_dir, f'{repo["project"]}-bom.json'),
repo['repo_dir']
]
return list2cmdline(cdxgen_cmd)
if repo["language"] == "python-c":
cdxgen_cmd = [
"cdxgen",
"--no-include-formulation",
"-t",
"python",
"-t",
"c",
"-o",
Path.joinpath(output_dir, f"{repo['project']}-bom.json"),
repo["repo_dir"]
]
else:
cdxgen_cmd = [
"cdxgen",
"--no-include-formulation",
"-t",
repo['language'],
"-o",
Path.joinpath(output_dir, f"{repo['project']}-bom.json"),
repo["repo_dir"]
]
cmd = f"CDXGEN_DEBUG_MODE=debug {list2cmdline(cdxgen_cmd)}"
if repo["cdxgen_vars"]:
cmd = f"{repo['cdxgen_vars']} {cmd}"
if "python" in repo["language"]:
if repo["package_manager"] == "poetry":
cmd = f"VIRTUAL_ENV=$(poetry env list --full-path | grep -E -o '(/\S+)+/pypoetry/virtualenvs/\S+') {cmd}"
else:
cmd = f"VIRTUAL_ENV={repo['repo_dir']}/.venv {cmd}"
return cmd


def run_pre_builds(repo_data, output_dir, debug_cmds, sdkman_sh):
Expand Down
28 changes: 15 additions & 13 deletions test/diff/repos.csv
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
project,link,language,language_range,config_files,package_manager,pm_version,pre_build_cmd,build_cmd,commit
django-goat,https://github.com/red-and-black/DjangoGoat.git,python,,,,,,python -m venv venv; source venv/bin/activate && pip install -r requirements_app.txt,5e6aaa6d0497bf24abd179304e6ca51295a8091d
java-sec-code,https://github.com/JoyChou93/java-sec-code.git,java8,8,,,,sdk use java 8.0.432-tem,mvn -B clean compile -DskipTests=true,457d703e8f89bff657c6c51151ada71ebd09a1c6
rasa,https://github.com/RasaHQ/rasa.git,python,,,,,pipx install poetry,,7807b19ad5fffab73ca1a04dc710f812115a9288
restic,https://github.com/restic/restic.git,go,,,,,,go run build.go,3786536dc18ef27aedcfa8e4c6953b48353eee79
syncthing,https://github.com/syncthing/syncthing.git,go,,,,,,go run build.go,ba6ac2f604eb1cd27764460b687537c5e40aaaf8
github-readme-stats,https://github.com/anuraghazra/github-readme-stats.git,javascript,>=18,package-lock.json,npm,,,npm install .,9a0d9ae2c17e007cbb8e9f32654941e1f0a8268e
prettier,https://github.com/prettier/prettier.git,javascript,>=18,"package.json,yarn.lock",yarn,4.5.0,,corepack enable yarn;yarn install,9cf9079f75a30f1088529e0cae6296aeb71205ba
astro,https://github.com/withastro/astro.git,javascript,>=18.17.1,"package.json,pnpm-lock.yaml",pnpm,9.12.1,,pnpm install,9d6bcdb88fcb9df0c5c70e2b591bcf962ce55f63
plantuml,https://github.com/plantuml/plantuml.git,java8,8,,gradle,,sdk use java 8.0.432-tem,gradle clean build -x javaDoc -PjavacRelease=8,8eb791f39478778788fd47a9195dc1b2feb3eade
plantuml_17,https://github.com/plantuml/plantuml.git,java17,17,,gradle,,,gradle clean build -x javaDoc -PjavacRelease=17,8eb791f39478778788fd47a9195dc1b2feb3eade
jazzer,https://github.com/CodeIntelligenceTesting/jazzer.git,java8,8,,bazelisk,7.3.0,sdk use java 8.0.432-tem;export BAZEL_CMD='bazelisk';export BAZEL_TARGET='//:jazzer_release',bazelisk build //:jazzer_release,3947707d7db7e5cae0c8cfaeb10bdfeb06fc32bb
tinydb,https://github.com/msiemens/tinydb.git,python,,,,,,python -m venv venv; source venv/bin/activate && pip install .,3dc6a952ef8700706909bf60a1b15cf21af47608
project,link,language,language_range,config_files,package_manager,pm_version,pre_build_cmd,build_cmd,cdxgen_vars,commit
genforce,https://github.com/genforce/genforce.git,python,"3.9.20,3.10.15",requirements.txt,pip,,,pip install -r requirements.txt,,197feee82101b78266521c8470648bbb9b7f31f4
java-sec-code,https://github.com/JoyChou93/java-sec-code.git,java8,8,,,,sdk use java 8.0.432-tem,mvn -B clean compile -DskipTests=true,,457d703e8f89bff657c6c51151ada71ebd09a1c6
restic,https://github.com/restic/restic.git,go,,,,,,go run build.go,,3786536dc18ef27aedcfa8e4c6953b48353eee79
syncthing,https://github.com/syncthing/syncthing.git,go,,,,,,go run build.go,,ba6ac2f604eb1cd27764460b687537c5e40aaaf8
github-readme-stats,https://github.com/anuraghazra/github-readme-stats.git,javascript,>=18,package-lock.json,npm,,,npm install .,,9a0d9ae2c17e007cbb8e9f32654941e1f0a8268e
prettier,https://github.com/prettier/prettier.git,javascript,>=18,"package.json,yarn.lock",yarn,4.5.0,,corepack enable yarn;yarn install,,9cf9079f75a30f1088529e0cae6296aeb71205ba
astro,https://github.com/withastro/astro.git,javascript,>=18.17.1,"package.json,pnpm-lock.yaml",pnpm,9.12.1,,pnpm install,,9d6bcdb88fcb9df0c5c70e2b591bcf962ce55f63
plantuml,https://github.com/plantuml/plantuml.git,java8,8,,gradle,,sdk use java 8.0.432-tem,gradle clean build -x javaDoc -PjavacRelease=8,GRADLE_ARGS='clean build -x javaDoc -PjavacRelease=8',8eb791f39478778788fd47a9195dc1b2feb3eade
plantuml_17,https://github.com/plantuml/plantuml.git,java17,17,,gradle,,,gradle clean build -x javaDoc -PjavacRelease=17,GRADLE_ARGS='clean build -x javaDoc -PjavacRelease=17',8eb791f39478778788fd47a9195dc1b2feb3eade
jazzer,https://github.com/CodeIntelligenceTesting/jazzer.git,java8,8,,bazelisk,7.3.0,sdk use java 8.0.432-tem,bazelisk build //:jazzer_release,BAZEL_CMD='bazelisk' BAZEL_TARGET=//:jazzer_release,3947707d7db7e5cae0c8cfaeb10bdfeb06fc32bb
tinydb,https://github.com/msiemens/tinydb.git,python,"3.9.20,3.10.15,3.11.10,3.12.7","pyproject.toml,poetry.lock",poetry,,,poetry install,,10644a0e07ad180c5b756aba272ee6b0dbd12df8
funcy,https://github.com/Suor/funcy.git,python,"3.9.20,3.10.15",setup.py,pip,,rm test_requirements.txt,python setup.py install,,859056d039adea75c1c3550286437ce0b612fe92
numpy,https://github.com/numpy/numpy.git,python-c,"3.9.20,3.10.15,3.11.10,3.12.7","setup.py,pyproject.toml",pip,,git submodule update --init,,,93fdebfcb4bc4cd53c959ccd0117a612d5f13f1a
requests,https://github.com/psf/requests.git,python,"3.9.20,3.10.15,3.11.10,3.12.7","setup.py",,,,pip install .,,23540c93cac97c763fe59e843a08fa2825aa80fd

0 comments on commit 5abfcd5

Please sign in to comment.