From 0ad0ede0135b0be998036adf29b8e0da0514973d Mon Sep 17 00:00:00 2001 From: Andrei Dreyer Date: Wed, 12 Jun 2024 15:47:11 +0200 Subject: [PATCH] [datasets] added semgrep downloader --- .../joern/benchmarks/datasets/BenchmarkDataset.scala | 9 +++++++-- .../benchmarks/datasets/BenchmarkDatasetConfig.scala | 12 +++++++++++- .../datasets/runner/OWASPJavaDownloader.scala | 7 ++++++- .../datasets/runner/SecuribenchMicroDownloader.scala | 9 +++++---- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala index 9eccddf..c478c74 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala @@ -35,7 +35,6 @@ class BenchmarkDataset(config: BenchmarkDatasetConfig) { } object BenchmarkDataset { - val benchmarkConstructors: Map[AvailableBenchmarks.Value, BenchmarkDatasetConfig => DatasetDownloader] = Map( // TODO: Add when implementing (AvailableBenchmarks.OWASP_JAVASRC, x => new OWASPJavaDownloader(x.datasetDir, JavaCpgTypes.JAVASRC)), @@ -45,7 +44,13 @@ object BenchmarkDataset { x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVASRC) ), (AvailableBenchmarks.SECURIBENCH_MICRO_JAVA, x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA)), - (AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir)) + (AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir)), + ( + AvailableBenchmarks.SECURIBENCH_MICRO_SEMGREP, + x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.SEMGREP) + ), + (AvailableBenchmarks.OWASP_SEMGREP, x => new OWASPJavaDownloader(x.datasetDir, JavaCpgTypes.SEMGREP)), + (AvailableBenchmarks.ICHNAEA_SEMGREP, x => new IchnaeaDownloader(x.datasetDir)) ) } diff --git a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala index 28fc567..5c28c7d 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala @@ -8,17 +8,27 @@ case class BenchmarkDatasetConfig( ) object AvailableBenchmarks extends Enumeration { - val ALL = Value + val ALL = Value + + // Joern val OWASP_JAVASRC = Value val OWASP_JAVA = Value val SECURIBENCH_MICRO_JAVASRC = Value val SECURIBENCH_MICRO_JAVA = Value val ICHNAEA_JSSRC = Value + val THORAT_PYSRC = Value + + // Semgrep + val OWASP_SEMGREP = Value + val SECURIBENCH_MICRO_SEMGREP = Value + val THORAT_SEMGREP = Value + val ICHNAEA_SEMGREP = Value } object JavaCpgTypes extends Enumeration { val JAVASRC = Value val JAVA = Value + val SEMGREP = Value } object OutputFormat extends Enumeration { diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/OWASPJavaDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/OWASPJavaDownloader.scala index 9a05644..ac4d221 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/runner/OWASPJavaDownloader.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/OWASPJavaDownloader.scala @@ -26,9 +26,14 @@ class OWASPJavaDownloader(datasetDir: File, cpgCreatorType: JavaCpgTypes.Value) override def initialize(): Try[File] = Try { downloadBenchmarkAndUnarchive(CompressionTypes.ZIP) + + val datasetLabel = + if cpgCreatorType == JavaCpgTypes.JAVA then JavaCpgTypes.JAVA.toString + else JavaCpgTypes.JAVASRC.toString + compressBenchmark( benchmarkBaseDir, - Option(File(s"${datasetDir.pathAsString}/OWASP-BenchmarkJava-$cpgCreatorType.zip")) + Option(File(s"${datasetDir.pathAsString}/OWASP-BenchmarkJava-$datasetLabel.zip")) ) } diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala index b601c6d..674f48d 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala @@ -55,10 +55,11 @@ class SecuribenchMicroDownloader(datasetDir: File, cpgCreatorType: JavaCpgTypes. } } - compressBenchmark( - benchmarkBaseDir, - Option(File(s"${datasetDir.pathAsString}/securibench-micro-${cpgCreatorType.toString}.zip")) - ) + val datasetLabel = + if cpgCreatorType == JavaCpgTypes.JAVA then JavaCpgTypes.JAVA.toString + else JavaCpgTypes.JAVASRC.toString + + compressBenchmark(benchmarkBaseDir, Option(File(s"${datasetDir.pathAsString}/securibench-micro-$datasetLabel.zip"))) } override def run(): Unit = {