From 01ee1865c16d9cebad24d55e9129ea7f00ed8bfa Mon Sep 17 00:00:00 2001 From: Andrei Dreyer Date: Wed, 12 Jun 2024 16:16:58 +0200 Subject: [PATCH] [datasets] added THORAT for PYSRC and SEMGREP --- .github/workflows/release.yaml | 2 + .../datasets/BenchmarkDataset.scala | 7 +++- .../datasets/runner/ThoratDownloader.scala | 40 +++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 88d71a3..52d394d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -46,6 +46,7 @@ jobs: mv workspace/securibench-micro-JAVASRC.zip securibench-micro-JAVASRC.zip mv workspace/OWASP-BenchmarkJava-JAVA.zip OWASP-BenchmarkJava-JAVA.zip mv workspace/OWASP-BenchmarkJava-JAVASRC.zip OWASP-BenchmarkJava-JAVASRC.zip + mv workspace/THORAT.zip THORAT.zip - name: Set next release version id: taggerFinal uses: anothrNick/github-tag-action@1.61.0 @@ -62,3 +63,4 @@ jobs: securibench-micro-JAVASRC.zip OWASP-BenchmarkJava-JAVA.zip OWASP-BenchmarkJava-JAVASRC.zip + THORAT.zip \ No newline at end of file diff --git a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala index c478c74..bd8c8ad 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala @@ -6,7 +6,8 @@ import io.joern.benchmarks.datasets.runner.{ DatasetDownloader, IchnaeaDownloader, OWASPJavaDownloader, - SecuribenchMicroDownloader + SecuribenchMicroDownloader, + ThoratDownloader } import org.slf4j.LoggerFactory import upickle.default.* @@ -50,7 +51,9 @@ object BenchmarkDataset { x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.SEMGREP) ), (AvailableBenchmarks.OWASP_SEMGREP, x => new OWASPJavaDownloader(x.datasetDir, JavaCpgTypes.SEMGREP)), - (AvailableBenchmarks.ICHNAEA_SEMGREP, x => new IchnaeaDownloader(x.datasetDir)) + (AvailableBenchmarks.ICHNAEA_SEMGREP, x => new IchnaeaDownloader(x.datasetDir)), + (AvailableBenchmarks.THORAT_PYSRC, x => new ThoratDownloader(x.datasetDir)), + (AvailableBenchmarks.THORAT_SEMGREP, x => new ThoratDownloader(x.datasetDir)) ) } diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala new file mode 100644 index 0000000..cfef000 --- /dev/null +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala @@ -0,0 +1,40 @@ +package io.joern.benchmarks.datasets.runner + +import better.files.File +import io.joern.benchmarks.* +import org.slf4j.LoggerFactory +import upickle.default.* + +import java.net.{URI, URL} +import scala.util.{Failure, Success, Try} + +class ThoratDownloader(datasetDir: File) extends DatasetDownloader(datasetDir) with SingleFileDownloader { + + private val logger = LoggerFactory.getLogger(getClass) + + private val version = "0.0.7" + override val benchmarkName = s"Thorat Python v$version" + + override protected val benchmarkUrl: URL = URI( + s"https://github.com/DavidBakerEffendi/benchmark-for-taint-analysis-tools-for-python/archive/refs/tags/v$version.zip" + ).toURL + override protected val benchmarkFileName: String = s"benchmark-for-taint-analysis-tools-for-python-$version" + override protected val benchmarkBaseDir: File = datasetDir / benchmarkFileName + + override def initialize(): Try[File] = Try { + val outputFile = File(s"${datasetDir.pathAsString}/THORAT.zip") + + if !outputFile.exists then + downloadBenchmarkAndUnarchive(CompressionTypes.ZIP) + compressBenchmark(benchmarkBaseDir, Option(File(s"${datasetDir.pathAsString}/THORAT.zip"))) + else outputFile + } + + override def run(): Unit = Try { + initialize() match { + case Failure(exception) => + logger.error(s"Unable to initialize benchmark '$getClass'", exception) + case Success(benchmarkDir) => + } + } +}