From b840dd3794c49ffe6a156c5fff55f24aee4ede46 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Sun, 27 Oct 2024 15:41:33 +0200 Subject: [PATCH] Streamlined project a bit --- .github/workflows/pr.yaml | 10 +-- .github/workflows/release.yaml | 6 +- README.md | 19 +---- build.sbt | 33 +++----- install-local-joern.sh | 76 ------------------- joern | 1 - joern-benchmarks-datasets | 2 +- project/DownloadHelper.scala | 48 ------------ project/Projects.scala | 7 -- project/Versions.scala | 31 ++------ project/plugins.sbt | 1 - repl | 1 - .../datasets/BenchmarkDataset.scala | 17 ++--- .../datasets/BenchmarkDatasetConfig.scala | 12 +-- .../io/joern/benchmarks/datasets/Main.scala | 7 +- .../joern/benchmarks/datasets/ReplMain.scala | 21 ----- .../joern/benchmarks/datasets/package.scala | 3 - .../datasets/runner/DatasetDownloader.scala | 7 +- .../datasets/runner/Defects4jDownloader.scala | 72 ++++++++++++++++++ .../runner/SecuribenchMicroDownloader.scala | 3 +- .../datasets/runner/ThoratDownloader.scala | 4 +- .../benchmarks/datasets/runner/package.scala | 28 +++++++ 22 files changed, 146 insertions(+), 263 deletions(-) delete mode 100755 install-local-joern.sh delete mode 120000 joern delete mode 100644 project/DownloadHelper.scala delete mode 100644 project/Projects.scala delete mode 120000 repl delete mode 100644 src/main/scala/io/joern/benchmarks/datasets/ReplMain.scala delete mode 100644 src/main/scala/io/joern/benchmarks/datasets/package.scala create mode 100644 src/main/scala/io/joern/benchmarks/datasets/runner/Defects4jDownloader.scala create mode 100644 src/main/scala/io/joern/benchmarks/datasets/runner/package.scala diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b05a6dd..4871213 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -1,7 +1,7 @@ name: pr on: pull_request jobs: - test: + compile: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -18,8 +18,8 @@ jobs: ~/.sbt ~/.coursier key: ${{ runner.os }}-sbt-${{ hashfiles('**/build.sbt') }} - - name: Compile and run tests - run: sbt clean test + - name: Ensure successful compilation + run: sbt compile formatting: runs-on: ubuntu-latest steps: @@ -38,6 +38,6 @@ jobs: ~/.coursier key: ${{ runner.os }}-sbt-${{ hashfiles('**/build.sbt') }} - name: Check formatting - run: sbt scalafmtCheck Test/scalafmtCheck - - run: echo "Previous step failed because code is not formatted. Run 'sbt scalafmt Test/scalafmt'" + run: sbt scalafmtCheck + - run: echo "Previous step failed because code is not formatted. Run 'sbt scalafmt'" if: ${{ failure() }} \ No newline at end of file diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c4858d5..2e4e004 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -44,7 +44,9 @@ jobs: mv workspace/ichnaea.zip ichnaea.zip mv workspace/securibench-micro-JAVA.zip securibench-micro-JAVA.zip mv workspace/securibench-micro-JAVASRC.zip securibench-micro-JAVASRC.zip - mv workspace/THORAT.zip THORAT.zip + mv workspace/thorat.zip thorat.zip + mv workspace/defects4j.zip defects4j.zip + mv workspace/bugs_in_py.zip bugs_in_py.zip - name: Set next release version id: taggerFinal uses: anothrNick/github-tag-action@1.61.0 @@ -59,4 +61,4 @@ jobs: ichnaea.zip securibench-micro-JAVA.zip securibench-micro-JAVASRC.zip - THORAT.zip \ No newline at end of file + thorat.zip \ No newline at end of file diff --git a/README.md b/README.md index cc0c591..b88c9e0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Joern Benchmarks ================ -A repository for running Joern against known benchmarks. +A repository for building snapshots of datasets used by `joern-benchmarks`. ## Usage @@ -11,21 +11,10 @@ sbt stage joern-benchmark v0.0.1 Usage: joern-benchmark [options] benchmark -A benchmarking suite for Joern +A benchmark downloader tool for Joern benchmarks -h, --help --version Prints the version - benchmark The benchmark to run. Available [ALL,OWASP_JAVASRC,OWASP_JAVA,SECURIBENCH_MICRO_JAVASRC,SECURIBENCH_MICRO_JAVA] + benchmark The benchmark to download. Available [ALL,SECURIBENCH_MICRO_SRC,SECURIBENCH_MICRO_JAVA,ICHNAEA_JSSRC,THORAT_PYSRC,BUGS_IN_PY,DEFECTS4J] -d, --dataset-dir - The dataset directory where benchmarks will be downloaded to. Default is `./workspace`. + The dataset directory where benchmarks will be downloaded to. Default is `./workspace` ``` - -## Benchmarks - -The benchmark naming convention of `_`, e.g. `OWASP_JAVA` runs `OWASP` using the `jimple2cpg` -frontend (JVM bytecode). - -| Benchmark | Status | Enabled Frontends | -|-----------------------------------------------------------------------|--------|-------------------| -| [`OWASP`](https://owasp.org/www-project-benchmark/) | WIP | `JAVASRC` | -| [`SECURIBENCH_MICRO`](https://github.com/too4words/securibench-micro) | WIP | `JAVASRC` `JAVA` | -| [`ICHNAEA`](https://www.franktip.org/pubs/tse2020.pdf) | WIP | `JSSRC` | diff --git a/build.sbt b/build.sbt index 736e37f..c6da5a4 100644 --- a/build.sbt +++ b/build.sbt @@ -1,32 +1,21 @@ -name := "joern-benchmarks-datasets-datasets" +name := "joern-benchmarks-datasets" ThisBuild / organization := "io.joern" ThisBuild / scalaVersion := "3.4.1" -// parsed by project/Versions.scala, updated by updateDependencies.sh -val cpgVersion = "1.6.11" -val joernVersion = "2.0.348" -val overflowdbVersion = "1.192" - - libraryDependencies ++= Seq( - "com.github.pathikrit" %% "better-files" % Versions.betterFiles, - "com.github.scopt" %% "scopt" % Versions.scopt, + "com.github.pathikrit" %% "better-files" % Versions.betterFiles, + "com.github.scopt" %% "scopt" % Versions.scopt, "org.apache.logging.log4j" % "log4j-slf4j2-impl" % Versions.log4j % Optional, - "com.lihaoyi" %% "requests" % Versions.requests, - "com.lihaoyi" %% "upickle" % Versions.upickle, - "io.joern" %% "joern-cli" % Versions.joern, - "io.joern" %% "x2cpg" % Versions.joern + "com.lihaoyi" %% "requests" % Versions.requests, + "com.lihaoyi" %% "upickle" % Versions.upickle, + "com.github.sh4869" %% "semver-parser-scala" % Versions.semver, + "org.apache.commons" % "commons-compress" % Versions.commonsCompress ) -// mostly so that `sbt assembly` works, but also to ensure that we don't end up -// with unexpected shadowing in jar hell -excludeDependencies ++= Seq(ExclusionRule("io.shiftleft", "codepropertygraph-domain-classes_3")) - assembly / assemblyMergeStrategy := { - case "log4j2.xml" => MergeStrategy.first - case "module-info.class" => MergeStrategy.first - case "META-INF/versions/9/module-info.class" => MergeStrategy.first - case "io/github/retronym/java9rtexport/Export.class" => MergeStrategy.first + case "log4j2.xml" => MergeStrategy.first + case "module-info.class" => MergeStrategy.first + case "META-INF/versions/9/module-info.class" => MergeStrategy.first case PathList("scala", "collection", "internal", "pprint", _) => MergeStrategy.first case x => val oldStrategy = (ThisBuild / assemblyMergeStrategy).value @@ -48,5 +37,5 @@ ThisBuild / resolvers ++= Seq( "Gradle Releases" at "https://repo.gradle.org/gradle/libs-releases/" ) -Compile / doc / sources := Seq.empty +Compile / doc / sources := Seq.empty Compile / packageDoc / publishArtifact := false diff --git a/install-local-joern.sh b/install-local-joern.sh deleted file mode 100755 index b2baf3c..0000000 --- a/install-local-joern.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash -set -o errexit -set -o pipefail -set -o nounset - -# extract joern_version from build.sbt - parsing just like in project/Versions.scala -readonly JOERN_VERSION=$(grep 'val joernVersion = "' build.sbt | sed 's/.*"\(.*\)"/\1/') - -# get script location, use as a root dir for this script -if [ "$(uname)" = 'Darwin' ]; then - # https://unix.stackexchange.com/a/96238 - if [ "${BASH_SOURCE:-x}" != 'x' ]; then - this_script=$BASH_SOURCE - elif [ "${ZSH_VERSION:-x}" != 'x' ]; then - setopt function_argzero - this_script=$0 - elif eval '[[ -n ${.sh.file} ]]' 2>/dev/null; then - eval 'this_script=${.sh.file}' - else - echo 1>&2 "Unsupported shell. Please use bash, ksh93 or zsh." - exit 2 - fi - relative_directory=$(dirname "$this_script") - SCRIPT_ABS_DIR=$(cd "$relative_directory" && pwd) -else - SCRIPT_ABS_PATH=$(readlink -f "$0") - SCRIPT_ABS_DIR=$(dirname "$SCRIPT_ABS_PATH") -fi - -# Check required tools are installed. -check_installed() { - if ! type "$1" > /dev/null; then - echo "Please ensure you have $1 installed." - exit 1 - fi -} - -readonly JOERN_INSTALL="$SCRIPT_ABS_DIR/joern-inst" - -if [ -d "${JOERN_INSTALL}" ]; then - echo "found existing local joern installation in $JOERN_INSTALL" - echo "should we wipe it and start fresh? [y/N]" - read ANSWER - if [ ! -z $ANSWER ]; then - if [ "y" == $ANSWER ] || [ "Y" == $ANSWER ]; then - rm -rf "$JOERN_INSTALL" - fi - fi -fi - -if [ ! -d "${JOERN_INSTALL}" ]; then - echo "downloading and installing joern $JOERN_VERSION..." - check_installed "curl" - - # Fetch installer - echo "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh" - curl -L "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh" -o "$SCRIPT_ABS_DIR/joern-install.sh" - - # Install into `joern-inst` - chmod +x $SCRIPT_ABS_DIR/joern-install.sh - $SCRIPT_ABS_DIR/joern-install.sh --install-dir="$JOERN_INSTALL" --version=v$JOERN_VERSION --without-plugins - rm $SCRIPT_ABS_DIR/joern-install.sh - rm joern-cli.zip -fi - -readonly JAR_INSTALL_DIR=${JOERN_INSTALL}/joern-cli/lib/ - -echo "Building extension" -sbt clean stage - -echo "Installing jars into: ${JAR_INSTALL_DIR}" -rm ${JAR_INSTALL_DIR}/io.shiftleft.codepropertygraph-domain-classes* -cp target/universal/stage/lib/org.codeminers.standalone-* ${JAR_INSTALL_DIR} -cp target/universal/stage/lib/org.codeminers.*domain* ${JAR_INSTALL_DIR} - -echo "All done, you're ready to go in $JOERN_INSTALL" diff --git a/joern b/joern deleted file mode 120000 index 7df6070..0000000 --- a/joern +++ /dev/null @@ -1 +0,0 @@ -joern-inst/joern-cli/joern \ No newline at end of file diff --git a/joern-benchmarks-datasets b/joern-benchmarks-datasets index 5921710..e18ae71 120000 --- a/joern-benchmarks-datasets +++ b/joern-benchmarks-datasets @@ -1 +1 @@ -target/universal/stage/bin/main \ No newline at end of file +target/universal/stage/bin/joern-benchmarks-datasets \ No newline at end of file diff --git a/project/DownloadHelper.scala b/project/DownloadHelper.scala deleted file mode 100644 index 9dba958..0000000 --- a/project/DownloadHelper.scala +++ /dev/null @@ -1,48 +0,0 @@ -import java.io.File -import java.net.URI -import java.nio.file.{Files, Path, Paths} - -object DownloadHelper { - val LocalStorageDir = Paths.get(".local/source-urls") - - /** Downloads the remote file from the given url if either - * - the localFile is not available, - * - or the url is different from the previously downloaded file - * - or we don't have the original url from the previously downloaded file - * We store the information about the previously downloaded urls and the localFile in `.local` - */ - def ensureIsAvailable(url: String, localFile: File): Unit = { - if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) { - val localPath = localFile.toPath - Files.deleteIfExists(localPath) - - println(s"[INFO] downloading $url to $localFile") - sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream => - sbt.IO.transfer(inputStream, localFile) - } - - // persist url in local storage - val storageFile = storageInfoFileFor(localFile) - Files.createDirectories(storageFile.getParent) - Files.writeString(storageFile, url) - } - } - - private def relativePathToProjectRoot(path: Path): String = - Paths - .get("") - .toAbsolutePath - .normalize() - .relativize(path.toAbsolutePath) - .toString - - private def previousUrlForLocalFile(localFile: File): Option[String] = { - Option(storageInfoFileFor(localFile)) - .filter(Files.exists(_)) - .map(Files.readString) - .filter(_.nonEmpty) - } - - private def storageInfoFileFor(localFile: File): Path = - LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath)) -} diff --git a/project/Projects.scala b/project/Projects.scala deleted file mode 100644 index ad9e91a..0000000 --- a/project/Projects.scala +++ /dev/null @@ -1,7 +0,0 @@ -import sbt.* - -object Projects { - lazy val schema = project.in(file("schema")) - lazy val domainClasses = project.in(file("domain-classes")) - lazy val schemaExtender = project.in(file("schema-extender")) -} diff --git a/project/Versions.scala b/project/Versions.scala index 35ab31d..bf6e8fc 100644 --- a/project/Versions.scala +++ b/project/Versions.scala @@ -1,28 +1,9 @@ object Versions { - val cpg = parseVersion("cpgVersion") - val joern = parseVersion("joernVersion") - val overflowdb = parseVersion("overflowdbVersion") - val betterFiles = "3.9.2" - val log4j = "2.20.0" - val requests = "0.8.0" - val scopt = "4.1.0" - val upickle = "3.3.0" - - val jsAstGen = "3.14.0" - - private def parseVersion(key: String): String = { - val versionRegexp = s""".*val $key[ ]+=[ ]?"(.*?)"""".r - val versions: List[String] = scala.io.Source - .fromFile("build.sbt") - .getLines - .filter(_.contains(s"val $key")) - .collect { case versionRegexp(version) => version } - .toList - assert( - versions.size == 1, - s"""unable to extract $key from build.sbt, expected exactly one line like `val $key= "0.0.0-SNAPSHOT"`.""" - ) - versions.head - } + val commonsCompress = "1.27.1" + val log4j = "2.20.0" + val requests = "0.8.0" + val scopt = "4.1.0" + val upickle = "3.3.0" + val semver ="0.0.6" } diff --git a/project/plugins.sbt b/project/plugins.sbt index 769be48..ec9bfc3 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,5 +2,4 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3") addSbtPlugin("com.github.sbt" % "sbt-findbugs" % "2.0.0") addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1") addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.7") -addSbtPlugin("io.shiftleft" % "sbt-overflowdb" % "2.104") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1") diff --git a/repl b/repl deleted file mode 120000 index 9289d91..0000000 --- a/repl +++ /dev/null @@ -1 +0,0 @@ -target/universal/stage/bin/repl-main \ No newline at end of file diff --git a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala index 452a334..51c7769 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala @@ -5,6 +5,7 @@ import io.joern.benchmarks.datasets.AvailableBenchmarks import io.joern.benchmarks.datasets.runner.{ BugsInPyDownloader, DatasetDownloader, + Defects4jDownloader, IchnaeaDownloader, SecuribenchMicroDownloader, ThoratDownloader @@ -12,19 +13,14 @@ import io.joern.benchmarks.datasets.runner.{ import org.slf4j.LoggerFactory import upickle.default.* -/** The main benchmarking process. - */ class BenchmarkDataset(config: BenchmarkDatasetConfig) { private val logger = LoggerFactory.getLogger(getClass) def evaluate(): Unit = { - logger.info("Beginning evaluation") + logger.info("Beginning downloads") def runBenchmark(benchmarkRunnerCreator: BenchmarkDatasetConfig => DatasetDownloader): Unit = { - val benchmarkRunner = benchmarkRunnerCreator(config) - val benchmarkName = benchmarkRunner.benchmarkName - logger.info(s"Running $benchmarkName") - benchmarkRunner.run() + benchmarkRunnerCreator(config).run() } if (config.benchmark == AvailableBenchmarks.ALL) { @@ -42,9 +38,10 @@ object BenchmarkDataset { x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVASRC) ), (AvailableBenchmarks.SECURIBENCH_MICRO_JAVA, x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA)), - (AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir)), - (AvailableBenchmarks.THORAT_PYSRC, x => new ThoratDownloader(x.datasetDir)), - (AvailableBenchmarks.BUGS_IN_PY, x => new BugsInPyDownloader(x.datasetDir)) + (AvailableBenchmarks.ICHNAEA, x => new IchnaeaDownloader(x.datasetDir)), + (AvailableBenchmarks.THORAT, x => new ThoratDownloader(x.datasetDir)), + (AvailableBenchmarks.BUGS_IN_PY, x => new BugsInPyDownloader(x.datasetDir)), + (AvailableBenchmarks.DEFECTS4J, x => new Defects4jDownloader(x.datasetDir)) ) } diff --git a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala index 7f48f99..984d00e 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala @@ -13,19 +13,13 @@ object AvailableBenchmarks extends Enumeration { // Joern val SECURIBENCH_MICRO_SRC = Value val SECURIBENCH_MICRO_JAVA = Value - val ICHNAEA_JSSRC = Value - val THORAT_PYSRC = Value + val ICHNAEA = Value + val THORAT = Value val BUGS_IN_PY = Value + val DEFECTS4J = Value } object JavaCpgTypes extends Enumeration { val JAVASRC = Value val JAVA = Value - val SEMGREP = Value -} - -object OutputFormat extends Enumeration { - val JSON = Value - val CSV = Value - val MD = Value } diff --git a/src/main/scala/io/joern/benchmarks/datasets/Main.scala b/src/main/scala/io/joern/benchmarks/datasets/Main.scala index a4c191b..affd209 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/Main.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/Main.scala @@ -9,21 +9,16 @@ import scala.util.{Failure, Success} /** Example program that makes use of Joern as a library */ object Main { - private val logger = LoggerFactory.getLogger(getClass) - def main(args: Array[String]): Unit = { optionParser.parse(args, BenchmarkDatasetConfig()).map(BenchmarkDataset(_)).foreach(_.evaluate()) } private val optionParser: OptionParser[BenchmarkDatasetConfig] = - new OptionParser[BenchmarkDatasetConfig]("joern-benchmark") { + new OptionParser[BenchmarkDatasetConfig]("joern-benchmark-datasets") { implicit val availableBenchmarksRead: scopt.Read[AvailableBenchmarks.Value] = scopt.Read.reads(AvailableBenchmarks withName _) - implicit val outputFormatRead: scopt.Read[OutputFormat.Value] = - scopt.Read.reads(OutputFormat withName _) - implicit val betterFilesRead: scopt.Read[File] = scopt.Read.reads(File.apply(_)) diff --git a/src/main/scala/io/joern/benchmarks/datasets/ReplMain.scala b/src/main/scala/io/joern/benchmarks/datasets/ReplMain.scala deleted file mode 100644 index 7b8ea25..0000000 --- a/src/main/scala/io/joern/benchmarks/datasets/ReplMain.scala +++ /dev/null @@ -1,21 +0,0 @@ -package io.joern.benchmarks.datasets - -import io.joern.console.BridgeBase -import io.joern.joerncli.console.Predefined - -/** Extend/use joern as a REPL application */ -object ReplMain extends BridgeBase { - - def main(args: Array[String]): Unit = { - run(parseConfig(args)) - } - - override protected def predefLines = { - Predefined.forInteractiveShell ++ Seq(s"import _root_.${getClass.getPackageName}.*") - } - - override protected def promptStr = "benchmark-repl" - override protected def greeting = "Welcome to the benchmark REPL!" - override protected def onExitCode = """println("goodbye!")""" - override def applicationName = "benchmarks-dataflowengineoss" -} diff --git a/src/main/scala/io/joern/benchmarks/datasets/package.scala b/src/main/scala/io/joern/benchmarks/datasets/package.scala deleted file mode 100644 index 5998d98..0000000 --- a/src/main/scala/io/joern/benchmarks/datasets/package.scala +++ /dev/null @@ -1,3 +0,0 @@ -package io.joern.benchmarks - -package object datasets {} diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/DatasetDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/DatasetDownloader.scala index d709bf6..ce49996 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/runner/DatasetDownloader.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/DatasetDownloader.scala @@ -13,13 +13,8 @@ trait DatasetDownloader(protected val datasetDir: File) { val benchmarkName: String - /** Create and setup the benchmark if necessary. - * - * @return - * the directory where the benchmark is set up if successful. - */ protected def initialize(): Try[File] def run(): Unit -// + } diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/Defects4jDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/Defects4jDownloader.scala new file mode 100644 index 0000000..84d3ea0 --- /dev/null +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/Defects4jDownloader.scala @@ -0,0 +1,72 @@ +package io.joern.benchmarks.datasets.runner + +import better.files.File +import org.slf4j.LoggerFactory + +import java.io.IOException +import java.net.{URI, URL} +import java.nio.file.attribute.BasicFileAttributes +import java.nio.file.* +import scala.jdk.CollectionConverters.IteratorHasAsScala +import scala.util.{Failure, Success, Try} + +class Defects4jDownloader(datasetDir: File) extends DatasetDownloader(datasetDir) with MultiFileDownloader { + + private val logger = LoggerFactory.getLogger(getClass) + + override val benchmarkName = s"Defects4j" + + private val packageDetails: Seq[(String, String)] = Seq( + "Chart" -> "https://repo1.maven.org/maven2/org/jfree/jfreechart/1.5.5/jfreechart-1.5.5.jar", + "Cli" -> "https://repo1.maven.org/maven2/commons-cli/commons-cli/1.8.0/commons-cli-1.8.0.jar", + "Closure" -> "https://repo1.maven.org/maven2/com/google/javascript/closure-compiler/v20240317/closure-compiler-v20240317.jar", + "Codec" -> "https://repo1.maven.org/maven2/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar", + "Collections" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-collections4/4.4/commons-collections4-4.4.jar", + "Compress" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar", + "Csv" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-csv/1.11.0/commons-csv-1.11.0.jar", + "Gson" -> "https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar", + "JacksonCore" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-core/2.17.2/jackson-core-2.17.2.jar", + "JacksonDatabind" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/core/jackson-databind/2.17.2/jackson-databind-2.17.2.jar", + "JacksonXml" -> "https://repo1.maven.org/maven2/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/2.17.2/jackson-dataformat-xml-2.17.2.jar", + "Jsoup" -> "https://repo1.maven.org/maven2/org/jsoup/jsoup/1.18.1/jsoup-1.18.1.jar", + "JxPath" -> "https://repo1.maven.org/maven2/commons-jxpath/commons-jxpath/1.3/commons-jxpath-1.3.jar", + "Lang" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-lang3/3.14.0/commons-lang3-3.14.0.jar", + "Math" -> "https://repo1.maven.org/maven2/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar", + "Mockito" -> "https://repo1.maven.org/maven2/org/mockito/mockito-core/5.12.0/mockito-core-5.12.0.jar", + "Time" -> "https://repo1.maven.org/maven2/joda-time/joda-time/2.12.7/joda-time-2.12.7.jar" + ) + + /** The URL to the archive. + */ + override protected val benchmarkUrls: Map[String, URL] = packageDetails.map { case (name, urlString) => + name -> URI(urlString).toURL + }.toMap + + /** The name of the benchmark directory to download all benchmark components to. + */ + override protected val benchmarkDirName: String = "defects4j" + + /** The name of the benchmark directory. + */ + override protected val benchmarkBaseDir: File = datasetDir / benchmarkDirName + + override def initialize(): Try[File] = Try { + val downloadedDir = downloadBenchmarkAndUnarchive(CompressionTypes.ZIP) match { + case Success(dir) => + dir + case Failure(e) => throw e + } + + compressBenchmark(downloadedDir) + } + + override def run(): Unit = { + initialize() match { + case Failure(exception) => + logger.error(s"Unable to initialize benchmark '$getClass'", exception) + case Success(benchmarkDir) => + logger.info(s"Finished downloading benchmark `$getClass``") + } + } + +} diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala index 674f48d..3ac2cf4 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/SecuribenchMicroDownloader.scala @@ -3,7 +3,6 @@ package io.joern.benchmarks.datasets.runner import better.files.File import io.joern.benchmarks.* import io.joern.benchmarks.datasets.JavaCpgTypes -import io.joern.x2cpg.utils.ExternalCommand import org.slf4j.LoggerFactory import java.net.{URI, URL} @@ -47,7 +46,7 @@ class SecuribenchMicroDownloader(datasetDir: File, cpgCreatorType: JavaCpgTypes. "classes", sourceFiles ).mkString(" ") - ExternalCommand.run(command, benchmarkBaseDir.pathAsString) match { + runCmd(command, benchmarkBaseDir.toJava).toTry match { case Failure(exception) => logger.error(s"Exception encountered while compiling source code with: '$command'") throw exception diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala index ebb5a92..c2a98da 100644 --- a/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/ThoratDownloader.scala @@ -22,11 +22,11 @@ class ThoratDownloader(datasetDir: File) extends DatasetDownloader(datasetDir) w override protected val benchmarkBaseDir: File = datasetDir / benchmarkFileName override def initialize(): Try[File] = Try { - val outputFile = File(s"${datasetDir.pathAsString}/THORAT.zip") + val outputFile = File(s"${datasetDir.pathAsString}/thorat.zip") if !outputFile.exists then downloadBenchmarkAndUnarchive(CompressionTypes.ZIP) - compressBenchmark(benchmarkBaseDir, Option(File(s"${datasetDir.pathAsString}/THORAT.zip"))) + compressBenchmark(benchmarkBaseDir, Option(File(s"${datasetDir.pathAsString}/thorat.zip"))) else outputFile } diff --git a/src/main/scala/io/joern/benchmarks/datasets/runner/package.scala b/src/main/scala/io/joern/benchmarks/datasets/runner/package.scala new file mode 100644 index 0000000..cdf9107 --- /dev/null +++ b/src/main/scala/io/joern/benchmarks/datasets/runner/package.scala @@ -0,0 +1,28 @@ +package io.joern.benchmarks.datasets + +import java.io.File +import scala.sys.process.{Process, ProcessLogger} +import scala.util.{Failure, Success, Try} + +package object runner { + + def runCmd(in: String, cwd: File): RunOutput = { + val qb = Process(in, cwd) + var out = List[String]() + var err = List[String]() + + val exit = qb ! ProcessLogger(s => out ::= s, s => err ::= s) + + RunOutput(exit, out.reverse, err.reverse) + } + + case class RunOutput(exitCode: Int, stdOut: List[String], stdErr: List[String]) { + def toTry: Try[List[String]] = { + exitCode match { + case 0 => Success(stdOut) + case _ => Failure(new RuntimeException(stdErr.mkString("\n"))) + } + } + } + +}