Skip to content

Commit

Permalink
Merge pull request #1 from joernio/andrei/ichnaea-downloader
Browse files Browse the repository at this point in the history
Dataset Downloader - Ichnaea
  • Loading branch information
AndreiDreyer authored May 21, 2024
2 parents 49bf2a0 + c4d5cbd commit c50eb25
Show file tree
Hide file tree
Showing 26 changed files with 931 additions and 3 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
cpg.bin
target/
.idea/
/.bsp
/joern-inst
/workspace
/results
/bin
.local
5 changes: 5 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
version = 3.5.1
runner.dialect = scala3
preset = IntelliJ
maxColumn = 120
align.preset = true
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ A benchmarking suite for Joern
--version Prints the version
benchmark The benchmark to run. Available [ALL,OWASP_JAVASRC,OWASP_JAVA,SECURIBENCH_MICRO_JAVASRC,SECURIBENCH_MICRO_JAVA]
-d, --dataset-dir <value>
The dataset directory where benchmarks will be initialized and executed. Default is `./workspace`.
-o, --output <value> The output directory to write results to. Default is `./results`.
-f, --format <value> The output format to write results as. Default is JSON. Available [JSON,CSV,MD]
The dataset directory where benchmarks will be downloaded to. Default is `./workspace`.
```

## Benchmarks
Expand Down
52 changes: 52 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name := "joern-benchmarks-datasets-datasets"
ThisBuild / organization := "io.joern"
ThisBuild / scalaVersion := "3.4.1"

// parsed by project/Versions.scala, updated by updateDependencies.sh
val cpgVersion = "1.6.11"
val joernVersion = "2.0.348"
val overflowdbVersion = "1.192"


libraryDependencies ++= Seq(
"com.github.pathikrit" %% "better-files" % Versions.betterFiles,
"com.github.scopt" %% "scopt" % Versions.scopt,
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % Versions.log4j % Optional,
"com.lihaoyi" %% "requests" % Versions.requests,
"com.lihaoyi" %% "upickle" % Versions.upickle,
"io.joern" %% "joern-cli" % Versions.joern,
"io.joern" %% "x2cpg" % Versions.joern
)

// mostly so that `sbt assembly` works, but also to ensure that we don't end up
// with unexpected shadowing in jar hell
excludeDependencies ++= Seq(ExclusionRule("io.shiftleft", "codepropertygraph-domain-classes_3"))

assembly / assemblyMergeStrategy := {
case "log4j2.xml" => MergeStrategy.first
case "module-info.class" => MergeStrategy.first
case "META-INF/versions/9/module-info.class" => MergeStrategy.first
case "io/github/retronym/java9rtexport/Export.class" => MergeStrategy.first
case PathList("scala", "collection", "internal", "pprint", _) => MergeStrategy.first
case x =>
val oldStrategy = (ThisBuild / assemblyMergeStrategy).value
oldStrategy(x)
}

ThisBuild / Compile / scalacOptions ++= Seq("-feature", "-deprecation", "-language:implicitConversions")

enablePlugins(JavaAppPackaging)

ThisBuild / licenses := List("Apache-2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0"))

Global / onChangedBuildSource := ReloadOnSourceChanges

ThisBuild / resolvers ++= Seq(
Resolver.mavenLocal,
"Sonatype OSS" at "https://oss.sonatype.org/content/repositories/public",
"Atlassian" at "https://packages.atlassian.com/mvn/maven-atlassian-external",
"Gradle Releases" at "https://repo.gradle.org/gradle/libs-releases/"
)

Compile / doc / sources := Seq.empty
Compile / packageDoc / publishArtifact := false
76 changes: 76 additions & 0 deletions install-local-joern.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
set -o errexit
set -o pipefail
set -o nounset

# extract joern_version from build.sbt - parsing just like in project/Versions.scala
readonly JOERN_VERSION=$(grep 'val joernVersion = "' build.sbt | sed 's/.*"\(.*\)"/\1/')

# get script location, use as a root dir for this script
if [ "$(uname)" = 'Darwin' ]; then
# https://unix.stackexchange.com/a/96238
if [ "${BASH_SOURCE:-x}" != 'x' ]; then
this_script=$BASH_SOURCE
elif [ "${ZSH_VERSION:-x}" != 'x' ]; then
setopt function_argzero
this_script=$0
elif eval '[[ -n ${.sh.file} ]]' 2>/dev/null; then
eval 'this_script=${.sh.file}'
else
echo 1>&2 "Unsupported shell. Please use bash, ksh93 or zsh."
exit 2
fi
relative_directory=$(dirname "$this_script")
SCRIPT_ABS_DIR=$(cd "$relative_directory" && pwd)
else
SCRIPT_ABS_PATH=$(readlink -f "$0")
SCRIPT_ABS_DIR=$(dirname "$SCRIPT_ABS_PATH")
fi

# Check required tools are installed.
check_installed() {
if ! type "$1" > /dev/null; then
echo "Please ensure you have $1 installed."
exit 1
fi
}

readonly JOERN_INSTALL="$SCRIPT_ABS_DIR/joern-inst"

if [ -d "${JOERN_INSTALL}" ]; then
echo "found existing local joern installation in $JOERN_INSTALL"
echo "should we wipe it and start fresh? [y/N]"
read ANSWER
if [ ! -z $ANSWER ]; then
if [ "y" == $ANSWER ] || [ "Y" == $ANSWER ]; then
rm -rf "$JOERN_INSTALL"
fi
fi
fi

if [ ! -d "${JOERN_INSTALL}" ]; then
echo "downloading and installing joern $JOERN_VERSION..."
check_installed "curl"

# Fetch installer
echo "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh"
curl -L "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh" -o "$SCRIPT_ABS_DIR/joern-install.sh"

# Install into `joern-inst`
chmod +x $SCRIPT_ABS_DIR/joern-install.sh
$SCRIPT_ABS_DIR/joern-install.sh --install-dir="$JOERN_INSTALL" --version=v$JOERN_VERSION --without-plugins
rm $SCRIPT_ABS_DIR/joern-install.sh
rm joern-cli.zip
fi

readonly JAR_INSTALL_DIR=${JOERN_INSTALL}/joern-cli/lib/

echo "Building extension"
sbt clean stage

echo "Installing jars into: ${JAR_INSTALL_DIR}"
rm ${JAR_INSTALL_DIR}/io.shiftleft.codepropertygraph-domain-classes*
cp target/universal/stage/lib/org.codeminers.standalone-* ${JAR_INSTALL_DIR}
cp target/universal/stage/lib/org.codeminers.*domain* ${JAR_INSTALL_DIR}

echo "All done, you're ready to go in $JOERN_INSTALL"
1 change: 1 addition & 0 deletions joern
1 change: 1 addition & 0 deletions joern-benchmarks-datasets
14 changes: 14 additions & 0 deletions log4j2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %c{0}: %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Logger name="io.shiftleft.overflowdb" level="warn" />
<Root level="ERROR">
<AppenderRef ref="Console" />
</Root>
</Loggers>
</Configuration>
48 changes: 48 additions & 0 deletions project/DownloadHelper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import java.io.File
import java.net.URI
import java.nio.file.{Files, Path, Paths}

object DownloadHelper {
val LocalStorageDir = Paths.get(".local/source-urls")

/** Downloads the remote file from the given url if either
* - the localFile is not available,
* - or the url is different from the previously downloaded file
* - or we don't have the original url from the previously downloaded file
* We store the information about the previously downloaded urls and the localFile in `.local`
*/
def ensureIsAvailable(url: String, localFile: File): Unit = {
if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) {
val localPath = localFile.toPath
Files.deleteIfExists(localPath)

println(s"[INFO] downloading $url to $localFile")
sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream =>
sbt.IO.transfer(inputStream, localFile)
}

// persist url in local storage
val storageFile = storageInfoFileFor(localFile)
Files.createDirectories(storageFile.getParent)
Files.writeString(storageFile, url)
}
}

private def relativePathToProjectRoot(path: Path): String =
Paths
.get("")
.toAbsolutePath
.normalize()
.relativize(path.toAbsolutePath)
.toString

private def previousUrlForLocalFile(localFile: File): Option[String] = {
Option(storageInfoFileFor(localFile))
.filter(Files.exists(_))
.map(Files.readString)
.filter(_.nonEmpty)
}

private def storageInfoFileFor(localFile: File): Path =
LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath))
}
7 changes: 7 additions & 0 deletions project/Projects.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import sbt.*

object Projects {
lazy val schema = project.in(file("schema"))
lazy val domainClasses = project.in(file("domain-classes"))
lazy val schemaExtender = project.in(file("schema-extender"))
}
28 changes: 28 additions & 0 deletions project/Versions.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
object Versions {
val cpg = parseVersion("cpgVersion")
val joern = parseVersion("joernVersion")
val overflowdb = parseVersion("overflowdbVersion")

val betterFiles = "3.9.2"
val log4j = "2.20.0"
val requests = "0.8.0"
val scopt = "4.1.0"
val upickle = "3.3.0"

val jsAstGen = "3.14.0"

private def parseVersion(key: String): String = {
val versionRegexp = s""".*val $key[ ]+=[ ]?"(.*?)"""".r
val versions: List[String] = scala.io.Source
.fromFile("build.sbt")
.getLines
.filter(_.contains(s"val $key"))
.collect { case versionRegexp(version) => version }
.toList
assert(
versions.size == 1,
s"""unable to extract $key from build.sbt, expected exactly one line like `val $key= "0.0.0-SNAPSHOT"`."""
)
versions.head
}
}
1 change: 1 addition & 0 deletions project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.9.9
4 changes: 4 additions & 0 deletions project/meta-build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
libraryDependencies ++= Seq(
"com.github.pathikrit" %% "better-files" % "3.9.2",
)

6 changes: 6 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
addSbtPlugin("com.github.sbt" % "sbt-findbugs" % "2.0.0")
addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.7")
addSbtPlugin("io.shiftleft" % "sbt-overflowdb" % "2.104")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1")
1 change: 1 addition & 0 deletions repl
24 changes: 24 additions & 0 deletions src/main/resources/log4j2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Properties>
<Property name="pattern">%d{HH:mm:ss} [%level{WARN=*, DEBUG=#, ERROR=!, TRACE=%, INFO=+}] %msg%n</Property>
</Properties>
<Appenders>
<Console name="stdout" target="SYSTEM_OUT"> <!-- 1 -->
<PatternLayout pattern="${pattern}"/>
<ThresholdFilter level="ERROR" onMatch="DENY" onMismatch="ACCEPT"/> <!-- 3 -->
</Console>
<Console name="stderr" target="SYSTEM_ERR"> <!-- 2 -->
<PatternLayout pattern="${pattern}"/>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" onMismatch="DENY"/> <!-- 3 -->
</Console>
</Appenders>
<Loggers>
<Logger name="io.shiftleft.overflowdb" level="warn" />
<Logger name="io.joern.benchmarks.dataflowengineoss" level="info" />
<Root level="ERROR">
<AppenderRef ref="stdout" />
<AppenderRef ref="stderr" />
</Root>
</Loggers>
</Configuration>
54 changes: 54 additions & 0 deletions src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package io.joern.benchmarks.datasets

import io.joern.benchmarks.datasets.BenchmarkDataset.benchmarkConstructors
import io.joern.benchmarks.datasets.AvailableBenchmarks
import io.joern.benchmarks.datasets.runner.{
DatasetDownloader,
IchnaeaDownloader

// TODO: Add when implementing

// OWASPJavaDownloader,
// SecuribenchMicroDownloader
}
import org.slf4j.LoggerFactory
import upickle.default.*

/** The main benchmarking process.
*/
class BenchmarkDataset(config: BenchmarkDatasetConfig) {
private val logger = LoggerFactory.getLogger(getClass)

def evaluate(): Unit = {
logger.info("Beginning evaluation")

def runBenchmark(benchmarkRunnerCreator: BenchmarkDatasetConfig => DatasetDownloader): Unit = {
val benchmarkRunner = benchmarkRunnerCreator(config)
val benchmarkName = benchmarkRunner.benchmarkName
logger.info(s"Running $benchmarkName")
benchmarkRunner.run()
}

if (config.benchmark == AvailableBenchmarks.ALL) {
benchmarkConstructors.values.foreach(runBenchmark)
} else {
benchmarkConstructors.get(config.benchmark).foreach(runBenchmark)
}
}
}

object BenchmarkDataset {

val benchmarkConstructors: Map[AvailableBenchmarks.Value, BenchmarkDatasetConfig => DatasetDownloader] = Map(
// TODO: Add when implementing
// (AvailableBenchmarks.OWASP_JAVASRC, x => new OWASPJavaDownloader(x.datasetDir)),
// (AvailableBenchmarks.OWASP_JAVA, x => new OWASPJavaDownloader(x.datasetDir)),
// (
// AvailableBenchmarks.SECURIBENCH_MICRO_JAVASRC,
// x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA_SRC)
// ),
// (AvailableBenchmarks.SECURIBENCH_MICRO_JAVA, x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA_BYTECODE)),
(AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir))
)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package io.joern.benchmarks.datasets

import better.files.File

case class BenchmarkDatasetConfig(
benchmark: AvailableBenchmarks.Value = AvailableBenchmarks.ALL,
datasetDir: File = File("workspace")
)

object AvailableBenchmarks extends Enumeration {
val ALL = Value
val OWASP_JAVASRC = Value
val OWASP_JAVA = Value
val SECURIBENCH_MICRO_JAVASRC = Value
val SECURIBENCH_MICRO_JAVA = Value
val ICHNAEA_JSSRC = Value
}

object JavaCpgTypes extends Enumeration {
val JAVA_SRC = Value
val JAVA_BYTECODE = Value
}

object OutputFormat extends Enumeration {
val JSON = Value
val CSV = Value
val MD = Value
}
Loading

0 comments on commit c50eb25

Please sign in to comment.