-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from joernio/andrei/ichnaea-downloader
Dataset Downloader - Ichnaea
- Loading branch information
Showing
26 changed files
with
931 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
cpg.bin | ||
target/ | ||
.idea/ | ||
/.bsp | ||
/joern-inst | ||
/workspace | ||
/results | ||
/bin | ||
.local |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
version = 3.5.1 | ||
runner.dialect = scala3 | ||
preset = IntelliJ | ||
maxColumn = 120 | ||
align.preset = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
name := "joern-benchmarks-datasets-datasets" | ||
ThisBuild / organization := "io.joern" | ||
ThisBuild / scalaVersion := "3.4.1" | ||
|
||
// parsed by project/Versions.scala, updated by updateDependencies.sh | ||
val cpgVersion = "1.6.11" | ||
val joernVersion = "2.0.348" | ||
val overflowdbVersion = "1.192" | ||
|
||
|
||
libraryDependencies ++= Seq( | ||
"com.github.pathikrit" %% "better-files" % Versions.betterFiles, | ||
"com.github.scopt" %% "scopt" % Versions.scopt, | ||
"org.apache.logging.log4j" % "log4j-slf4j2-impl" % Versions.log4j % Optional, | ||
"com.lihaoyi" %% "requests" % Versions.requests, | ||
"com.lihaoyi" %% "upickle" % Versions.upickle, | ||
"io.joern" %% "joern-cli" % Versions.joern, | ||
"io.joern" %% "x2cpg" % Versions.joern | ||
) | ||
|
||
// mostly so that `sbt assembly` works, but also to ensure that we don't end up | ||
// with unexpected shadowing in jar hell | ||
excludeDependencies ++= Seq(ExclusionRule("io.shiftleft", "codepropertygraph-domain-classes_3")) | ||
|
||
assembly / assemblyMergeStrategy := { | ||
case "log4j2.xml" => MergeStrategy.first | ||
case "module-info.class" => MergeStrategy.first | ||
case "META-INF/versions/9/module-info.class" => MergeStrategy.first | ||
case "io/github/retronym/java9rtexport/Export.class" => MergeStrategy.first | ||
case PathList("scala", "collection", "internal", "pprint", _) => MergeStrategy.first | ||
case x => | ||
val oldStrategy = (ThisBuild / assemblyMergeStrategy).value | ||
oldStrategy(x) | ||
} | ||
|
||
ThisBuild / Compile / scalacOptions ++= Seq("-feature", "-deprecation", "-language:implicitConversions") | ||
|
||
enablePlugins(JavaAppPackaging) | ||
|
||
ThisBuild / licenses := List("Apache-2.0" -> url("http://www.apache.org/licenses/LICENSE-2.0")) | ||
|
||
Global / onChangedBuildSource := ReloadOnSourceChanges | ||
|
||
ThisBuild / resolvers ++= Seq( | ||
Resolver.mavenLocal, | ||
"Sonatype OSS" at "https://oss.sonatype.org/content/repositories/public", | ||
"Atlassian" at "https://packages.atlassian.com/mvn/maven-atlassian-external", | ||
"Gradle Releases" at "https://repo.gradle.org/gradle/libs-releases/" | ||
) | ||
|
||
Compile / doc / sources := Seq.empty | ||
Compile / packageDoc / publishArtifact := false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env bash | ||
set -o errexit | ||
set -o pipefail | ||
set -o nounset | ||
|
||
# extract joern_version from build.sbt - parsing just like in project/Versions.scala | ||
readonly JOERN_VERSION=$(grep 'val joernVersion = "' build.sbt | sed 's/.*"\(.*\)"/\1/') | ||
|
||
# get script location, use as a root dir for this script | ||
if [ "$(uname)" = 'Darwin' ]; then | ||
# https://unix.stackexchange.com/a/96238 | ||
if [ "${BASH_SOURCE:-x}" != 'x' ]; then | ||
this_script=$BASH_SOURCE | ||
elif [ "${ZSH_VERSION:-x}" != 'x' ]; then | ||
setopt function_argzero | ||
this_script=$0 | ||
elif eval '[[ -n ${.sh.file} ]]' 2>/dev/null; then | ||
eval 'this_script=${.sh.file}' | ||
else | ||
echo 1>&2 "Unsupported shell. Please use bash, ksh93 or zsh." | ||
exit 2 | ||
fi | ||
relative_directory=$(dirname "$this_script") | ||
SCRIPT_ABS_DIR=$(cd "$relative_directory" && pwd) | ||
else | ||
SCRIPT_ABS_PATH=$(readlink -f "$0") | ||
SCRIPT_ABS_DIR=$(dirname "$SCRIPT_ABS_PATH") | ||
fi | ||
|
||
# Check required tools are installed. | ||
check_installed() { | ||
if ! type "$1" > /dev/null; then | ||
echo "Please ensure you have $1 installed." | ||
exit 1 | ||
fi | ||
} | ||
|
||
readonly JOERN_INSTALL="$SCRIPT_ABS_DIR/joern-inst" | ||
|
||
if [ -d "${JOERN_INSTALL}" ]; then | ||
echo "found existing local joern installation in $JOERN_INSTALL" | ||
echo "should we wipe it and start fresh? [y/N]" | ||
read ANSWER | ||
if [ ! -z $ANSWER ]; then | ||
if [ "y" == $ANSWER ] || [ "Y" == $ANSWER ]; then | ||
rm -rf "$JOERN_INSTALL" | ||
fi | ||
fi | ||
fi | ||
|
||
if [ ! -d "${JOERN_INSTALL}" ]; then | ||
echo "downloading and installing joern $JOERN_VERSION..." | ||
check_installed "curl" | ||
|
||
# Fetch installer | ||
echo "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh" | ||
curl -L "https://github.com/ShiftLeftSecurity/joern/releases/download/v$JOERN_VERSION/joern-install.sh" -o "$SCRIPT_ABS_DIR/joern-install.sh" | ||
|
||
# Install into `joern-inst` | ||
chmod +x $SCRIPT_ABS_DIR/joern-install.sh | ||
$SCRIPT_ABS_DIR/joern-install.sh --install-dir="$JOERN_INSTALL" --version=v$JOERN_VERSION --without-plugins | ||
rm $SCRIPT_ABS_DIR/joern-install.sh | ||
rm joern-cli.zip | ||
fi | ||
|
||
readonly JAR_INSTALL_DIR=${JOERN_INSTALL}/joern-cli/lib/ | ||
|
||
echo "Building extension" | ||
sbt clean stage | ||
|
||
echo "Installing jars into: ${JAR_INSTALL_DIR}" | ||
rm ${JAR_INSTALL_DIR}/io.shiftleft.codepropertygraph-domain-classes* | ||
cp target/universal/stage/lib/org.codeminers.standalone-* ${JAR_INSTALL_DIR} | ||
cp target/universal/stage/lib/org.codeminers.*domain* ${JAR_INSTALL_DIR} | ||
|
||
echo "All done, you're ready to go in $JOERN_INSTALL" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
joern-inst/joern-cli/joern |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
target/universal/stage/bin/main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<Configuration status="WARN"> | ||
<Appenders> | ||
<Console name="Console" target="SYSTEM_ERR"> | ||
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %c{0}: %msg%n"/> | ||
</Console> | ||
</Appenders> | ||
<Loggers> | ||
<Logger name="io.shiftleft.overflowdb" level="warn" /> | ||
<Root level="ERROR"> | ||
<AppenderRef ref="Console" /> | ||
</Root> | ||
</Loggers> | ||
</Configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import java.io.File | ||
import java.net.URI | ||
import java.nio.file.{Files, Path, Paths} | ||
|
||
object DownloadHelper { | ||
val LocalStorageDir = Paths.get(".local/source-urls") | ||
|
||
/** Downloads the remote file from the given url if either | ||
* - the localFile is not available, | ||
* - or the url is different from the previously downloaded file | ||
* - or we don't have the original url from the previously downloaded file | ||
* We store the information about the previously downloaded urls and the localFile in `.local` | ||
*/ | ||
def ensureIsAvailable(url: String, localFile: File): Unit = { | ||
if (!localFile.exists() || Option(url) != previousUrlForLocalFile(localFile)) { | ||
val localPath = localFile.toPath | ||
Files.deleteIfExists(localPath) | ||
|
||
println(s"[INFO] downloading $url to $localFile") | ||
sbt.io.Using.urlInputStream(new URI(url).toURL) { inputStream => | ||
sbt.IO.transfer(inputStream, localFile) | ||
} | ||
|
||
// persist url in local storage | ||
val storageFile = storageInfoFileFor(localFile) | ||
Files.createDirectories(storageFile.getParent) | ||
Files.writeString(storageFile, url) | ||
} | ||
} | ||
|
||
private def relativePathToProjectRoot(path: Path): String = | ||
Paths | ||
.get("") | ||
.toAbsolutePath | ||
.normalize() | ||
.relativize(path.toAbsolutePath) | ||
.toString | ||
|
||
private def previousUrlForLocalFile(localFile: File): Option[String] = { | ||
Option(storageInfoFileFor(localFile)) | ||
.filter(Files.exists(_)) | ||
.map(Files.readString) | ||
.filter(_.nonEmpty) | ||
} | ||
|
||
private def storageInfoFileFor(localFile: File): Path = | ||
LocalStorageDir.resolve(relativePathToProjectRoot(localFile.toPath)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import sbt.* | ||
|
||
object Projects { | ||
lazy val schema = project.in(file("schema")) | ||
lazy val domainClasses = project.in(file("domain-classes")) | ||
lazy val schemaExtender = project.in(file("schema-extender")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
object Versions { | ||
val cpg = parseVersion("cpgVersion") | ||
val joern = parseVersion("joernVersion") | ||
val overflowdb = parseVersion("overflowdbVersion") | ||
|
||
val betterFiles = "3.9.2" | ||
val log4j = "2.20.0" | ||
val requests = "0.8.0" | ||
val scopt = "4.1.0" | ||
val upickle = "3.3.0" | ||
|
||
val jsAstGen = "3.14.0" | ||
|
||
private def parseVersion(key: String): String = { | ||
val versionRegexp = s""".*val $key[ ]+=[ ]?"(.*?)"""".r | ||
val versions: List[String] = scala.io.Source | ||
.fromFile("build.sbt") | ||
.getLines | ||
.filter(_.contains(s"val $key")) | ||
.collect { case versionRegexp(version) => version } | ||
.toList | ||
assert( | ||
versions.size == 1, | ||
s"""unable to extract $key from build.sbt, expected exactly one line like `val $key= "0.0.0-SNAPSHOT"`.""" | ||
) | ||
versions.head | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
sbt.version=1.9.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
libraryDependencies ++= Seq( | ||
"com.github.pathikrit" %% "better-files" % "3.9.2", | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3") | ||
addSbtPlugin("com.github.sbt" % "sbt-findbugs" % "2.0.0") | ||
addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1") | ||
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.7") | ||
addSbtPlugin("io.shiftleft" % "sbt-overflowdb" % "2.104") | ||
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
target/universal/stage/bin/repl-main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<Configuration status="INFO"> | ||
<Properties> | ||
<Property name="pattern">%d{HH:mm:ss} [%level{WARN=*, DEBUG=#, ERROR=!, TRACE=%, INFO=+}] %msg%n</Property> | ||
</Properties> | ||
<Appenders> | ||
<Console name="stdout" target="SYSTEM_OUT"> <!-- 1 --> | ||
<PatternLayout pattern="${pattern}"/> | ||
<ThresholdFilter level="ERROR" onMatch="DENY" onMismatch="ACCEPT"/> <!-- 3 --> | ||
</Console> | ||
<Console name="stderr" target="SYSTEM_ERR"> <!-- 2 --> | ||
<PatternLayout pattern="${pattern}"/> | ||
<ThresholdFilter level="ERROR" onMatch="ACCEPT" onMismatch="DENY"/> <!-- 3 --> | ||
</Console> | ||
</Appenders> | ||
<Loggers> | ||
<Logger name="io.shiftleft.overflowdb" level="warn" /> | ||
<Logger name="io.joern.benchmarks.dataflowengineoss" level="info" /> | ||
<Root level="ERROR"> | ||
<AppenderRef ref="stdout" /> | ||
<AppenderRef ref="stderr" /> | ||
</Root> | ||
</Loggers> | ||
</Configuration> |
54 changes: 54 additions & 0 deletions
54
src/main/scala/io/joern/benchmarks/datasets/BenchmarkDataset.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package io.joern.benchmarks.datasets | ||
|
||
import io.joern.benchmarks.datasets.BenchmarkDataset.benchmarkConstructors | ||
import io.joern.benchmarks.datasets.AvailableBenchmarks | ||
import io.joern.benchmarks.datasets.runner.{ | ||
DatasetDownloader, | ||
IchnaeaDownloader | ||
|
||
// TODO: Add when implementing | ||
|
||
// OWASPJavaDownloader, | ||
// SecuribenchMicroDownloader | ||
} | ||
import org.slf4j.LoggerFactory | ||
import upickle.default.* | ||
|
||
/** The main benchmarking process. | ||
*/ | ||
class BenchmarkDataset(config: BenchmarkDatasetConfig) { | ||
private val logger = LoggerFactory.getLogger(getClass) | ||
|
||
def evaluate(): Unit = { | ||
logger.info("Beginning evaluation") | ||
|
||
def runBenchmark(benchmarkRunnerCreator: BenchmarkDatasetConfig => DatasetDownloader): Unit = { | ||
val benchmarkRunner = benchmarkRunnerCreator(config) | ||
val benchmarkName = benchmarkRunner.benchmarkName | ||
logger.info(s"Running $benchmarkName") | ||
benchmarkRunner.run() | ||
} | ||
|
||
if (config.benchmark == AvailableBenchmarks.ALL) { | ||
benchmarkConstructors.values.foreach(runBenchmark) | ||
} else { | ||
benchmarkConstructors.get(config.benchmark).foreach(runBenchmark) | ||
} | ||
} | ||
} | ||
|
||
object BenchmarkDataset { | ||
|
||
val benchmarkConstructors: Map[AvailableBenchmarks.Value, BenchmarkDatasetConfig => DatasetDownloader] = Map( | ||
// TODO: Add when implementing | ||
// (AvailableBenchmarks.OWASP_JAVASRC, x => new OWASPJavaDownloader(x.datasetDir)), | ||
// (AvailableBenchmarks.OWASP_JAVA, x => new OWASPJavaDownloader(x.datasetDir)), | ||
// ( | ||
// AvailableBenchmarks.SECURIBENCH_MICRO_JAVASRC, | ||
// x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA_SRC) | ||
// ), | ||
// (AvailableBenchmarks.SECURIBENCH_MICRO_JAVA, x => new SecuribenchMicroDownloader(x.datasetDir, JavaCpgTypes.JAVA_BYTECODE)), | ||
(AvailableBenchmarks.ICHNAEA_JSSRC, x => new IchnaeaDownloader(x.datasetDir)) | ||
) | ||
|
||
} |
28 changes: 28 additions & 0 deletions
28
src/main/scala/io/joern/benchmarks/datasets/BenchmarkDatasetConfig.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package io.joern.benchmarks.datasets | ||
|
||
import better.files.File | ||
|
||
case class BenchmarkDatasetConfig( | ||
benchmark: AvailableBenchmarks.Value = AvailableBenchmarks.ALL, | ||
datasetDir: File = File("workspace") | ||
) | ||
|
||
object AvailableBenchmarks extends Enumeration { | ||
val ALL = Value | ||
val OWASP_JAVASRC = Value | ||
val OWASP_JAVA = Value | ||
val SECURIBENCH_MICRO_JAVASRC = Value | ||
val SECURIBENCH_MICRO_JAVA = Value | ||
val ICHNAEA_JSSRC = Value | ||
} | ||
|
||
object JavaCpgTypes extends Enumeration { | ||
val JAVA_SRC = Value | ||
val JAVA_BYTECODE = Value | ||
} | ||
|
||
object OutputFormat extends Enumeration { | ||
val JSON = Value | ||
val CSV = Value | ||
val MD = Value | ||
} |
Oops, something went wrong.