Skip to content

Commit

Permalink
update readme and add consistent naming convention
Browse files Browse the repository at this point in the history
  • Loading branch information
atharvakale343 committed Oct 24, 2024
1 parent dcb5121 commit 2a8b0c8
Show file tree
Hide file tree
Showing 13 changed files with 67 additions and 189 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
examples/out/*
test.py
examples/known_dataset_images
examples/target_folder_images
examples/known_content_directory_images
examples/target_directory_images

# MacOS
.DS_Store
Expand Down
23 changes: 20 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
"--output_sql",
"./examples/out/known_content_hashes.sqlite",
"--target_directory",
"./examples/target_folder",
"./examples/target_directory",
"--known_content_directory",
"./examples/known_dataset"
"./examples/known_content_directory"
],
"preLaunchTask": "remove-sqlite"
},
Expand All @@ -32,7 +32,7 @@
"--output_sql",
"./examples/out/known_content_images_hashes.sqlite",
"--known_content_directory",
"./examples/known_dataset_images"
"./examples/known_content_directory_images"
]
},
{
Expand All @@ -48,6 +48,23 @@
"request": "launch",
"module": "small_blk_forensics.backend.server",
"console": "integratedTerminal"
},
{
"name": "new cli",
"type": "debugpy",
"request": "launch",
"program": "cmd_interface2.py",
"console": "integratedTerminal",
"args": [
"gen_hash",
"--known_content_directory",
"examples/known_content_directory",
"--output_sql_path",
"examples/out/known_content_hashes.sqlite",
"--test_directories",
"one",
"two"
]
}
]
}
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "pypi"
pydantic = "~=2.9"
colorama = "0.4.6"
tqdm = "4.66.5"
flask-ml = "0.2.1"
flask-ml = "0.2.2"

[dev-packages]
black = "~=24.8"
Expand Down
8 changes: 4 additions & 4 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ python -m small_blk_forensics.backend.server

### Client example

Pre-requisite: start the server in the background.

```zsh
python client_example.py
```
Expand All @@ -45,8 +47,8 @@ Run SBF on a known content directory and target directory
```zsh
python cmd_interface.py gen_hash_random \
--output_sql ./examples/out/known_content_hashes.sqlite \
--target_directory ./examples/target_folder \
--known_content_directory ./examples/known_dataset \
--target_directory ./examples/target_directory \
--known_content_directory ./examples/known_content_directory \
--block_size 4
```

Expand All @@ -55,7 +57,7 @@ Generate a SQLite DB contains hashes of all the blocks within a source directory
```zsh
python cmd_interface.py gen_hash \
--output_sql ./examples/out/known_content_hashes.sqlite \
--known_content_directory ./examples/known_dataset \
--known_content_directory ./examples/known_content_directory \
--block_size 4
```

Expand All @@ -65,7 +67,7 @@ Run SBF on a pre-generated known content directory SQLite DB and target director
```zsh
python cmd_interface.py hash_random \
--input_sql ./examples/out/known_content_hashes.sqlite \
--target_directory ./examples/target_folder \
--target_directory ./examples/target_directory \
--block_size 4
```

Expand Down
4 changes: 2 additions & 2 deletions client_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

# The inputs to be sent to the server
inputs = {
"known_dataset": {"path": f'{root.joinpath("examples", "known_dataset")}'},
"known_content_directory": {"path": f'{root.joinpath("examples", "known_content_directory")}'},
"output_sql_path": {"path": f'{root.joinpath("examples", "out", "known_content_hashes.sqlite")}'},
"target_folder": {"path": f'{root.joinpath("examples", "target_folder")}'},
"target_directory": {"path": f'{root.joinpath("examples", "target_directory")}'},
}

# Parameters of the model
Expand Down
149 changes: 4 additions & 145 deletions cmd_interface.py
Original file line number Diff line number Diff line change
@@ -1,157 +1,16 @@
# Command line interface for small block forensics

import argparse
from pathlib import Path

from small_blk_forensics.ml.model import SmallBlockForensicsModel
from small_blk_forensics.utils.common import dir_path_arg_parser, file_path_arg_parser
from small_blk_forensics.utils.data import MyModelResponse
from flask_ml.flask_ml_cli import MLCli


def _print_results(result: MyModelResponse, args: argparse.Namespace):
print()
print("Results:")
print(f"\tMatch: {'Yes' if result.found else 'No'}")
print(f"\tTarget Probability: {args.target_probability}")
print(f"\tBlock Size: {args.block_size}")
if result.found:
print(f"\tMatched Target File: {result.target_file}")
print(f"\tMatch Known Dataset File: {result.known_dataset_file}")
print(f"\tBlock Num in Known Dataset File: {result.block_num_in_known_dataset}")
print(f"\tBlock Num in Target File: {result.block_num_in_target}")


def combined_parser_func(args: argparse.Namespace):
output_sql_file = Path(args.output_sql)

model = SmallBlockForensicsModel(args.block_size, args.target_probability)
result = model.run_with_known_content_directory(
Path(args.known_content_directory), Path(args.target_directory), output_sql_file
)

_print_results(result, args)


def gen_hash_parser_func(args: argparse.Namespace):
output_sql = Path(args.output_sql)

model = SmallBlockForensicsModel(args.block_size)
model.hash_directory(Path(args.known_content_directory), output_sql)
print()


def hash_random_parser_func(args: argparse.Namespace):
model = SmallBlockForensicsModel(args.block_size, args.target_probability)
result = model.run_with_known_content_sqlite(Path(args.input_sql), Path(args.target_directory))

_print_results(result, args)
from small_blk_forensics.backend.server import server


def main():
parser = argparse.ArgumentParser(description="Analyze target directories with small block forensics")
subparsers = parser.add_subparsers(help="Subcommands", required=True)

# Combined Parser
combined_parser = subparsers.add_parser(
"gen_hash_random",
help="Hash random blocks of a target directory and check against hashes of blocks contained within a source directory",
)
combined_parser.set_defaults(func=combined_parser_func)
combined_parser.add_argument(
"--target_directory",
type=dir_path_arg_parser,
help="The path to the directory containing files/folders of the content to analyze",
required=True,
)
combined_parser.add_argument(
"--output_sql",
type=str,
help="The path to save the SQLite table for known_content",
required=True,
)
combined_parser.add_argument(
"--known_content_directory",
type=dir_path_arg_parser,
help="The path to the directory containing the files/folders of known content",
default=None,
)
combined_parser.add_argument(
"--target_probability",
type=float,
help="The target probability to achieve. Higher means more of the target drive will be scanned. Defaults to 0.95",
default=0.95,
required=False,
)
combined_parser.add_argument(
"--block_size",
type=int,
help="The block size in bytes to be used. Defaults to 4096.",
default=4096,
required=False,
)

# Generate hashes parser
gen_hash_parser = subparsers.add_parser(
"gen_hash",
help="Generate a SQLite DB contains hashes of all the blocks within a source directory",
)
gen_hash_parser.set_defaults(func=gen_hash_parser_func)
gen_hash_parser.add_argument(
"--output_sql",
type=str,
help="The path to save the SQLite table for known_content",
required=True,
)
gen_hash_parser.add_argument(
"--known_content_directory",
type=dir_path_arg_parser,
help="The path to the directory containing the files/folders of known content",
default=None,
)
gen_hash_parser.add_argument(
"--block_size",
type=int,
help="The block size in bytes to be used. Defaults to 4096.",
default=4096,
required=False,
)

# Hash Random blocks parser
hash_random_parser = subparsers.add_parser(
"hash_random",
help="Hash random blocks of a target directory and check against hashes contained within an SQLite DB",
)
hash_random_parser.set_defaults(func=hash_random_parser_func)
hash_random_parser.add_argument(
"--input_sql",
type=file_path_arg_parser,
help="The path to the existing SQLite DB containing hashes of known content",
default=None,
)
hash_random_parser.add_argument(
"--target_directory",
type=dir_path_arg_parser,
help="The path to the directory containing files/folders of the content to analyze",
required=True,
)
hash_random_parser.add_argument(
"--target_probability",
type=float,
help="The target probability to achieve. Higher means more of the target drive will be scanned. Defaults to 0.95",
default=0.95,
required=False,
)
hash_random_parser.add_argument(
"--block_size",
type=int,
help="The block size in bytes to be used. Defaults to 4096.",
default=4096,
required=False,
)
args = parser.parse_args()
if args.func:
print()
args.func(args)
cli = MLCli(server, parser)
cli.run_cli()


if __name__ == "__main__":
Expand Down
File renamed without changes.
File renamed without changes.
18 changes: 9 additions & 9 deletions small_blk_forensics/backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ def task_schema_func_known_directory():
return TaskSchema(
inputs=[
InputSchema(
key="target_folder",
key="target_directory",
label="Target Directory",
subtitle="The directory containing files/folders of the content to analyze",
input_type=InputType.DIRECTORY,
),
InputSchema(
key="known_dataset",
key="known_content_directory",
label="Known Content Directory",
subtitle="The directory containing the files/folders of known content",
input_type=InputType.DIRECTORY,
Expand Down Expand Up @@ -150,8 +150,8 @@ def task_schema_func_known_directory():


class InputsKnownContentDirectory(TypedDict):
target_folder: DirectoryInput
known_dataset: DirectoryInput
target_directory: DirectoryInput
known_content_directory: DirectoryInput
output_sql_path: FileInput


Expand All @@ -165,8 +165,8 @@ def execute(inputs: InputsKnownContentDirectory, parameters: Parameters):
try:
return _execute_throws(
parameters,
inputs["target_folder"].path,
inputs["known_dataset"].path,
inputs["target_directory"].path,
inputs["known_content_directory"].path,
None,
inputs["output_sql_path"].path,
)
Expand All @@ -180,7 +180,7 @@ def task_schema_func_known_sql():
return TaskSchema(
inputs=[
InputSchema(
key="target_folder",
key="target_directory",
label="Target Directory",
subtitle="The directory containing files/folders of the content to analyze",
input_type=InputType.DIRECTORY,
Expand Down Expand Up @@ -214,7 +214,7 @@ def task_schema_func_known_sql():


class InputsKnownContentSql(TypedDict):
target_folder: DirectoryInput
target_directory: DirectoryInput
input_sql: FileInput


Expand All @@ -226,7 +226,7 @@ class InputsKnownContentSql(TypedDict):
)
def execute_sql(inputs: InputsKnownContentSql, parameters: Parameters):
try:
return _execute_throws(parameters, inputs["target_folder"].path, None, inputs["input_sql"].path, None)
return _execute_throws(parameters, inputs["target_directory"].path, None, inputs["input_sql"].path, None)
except Exception as e:
logger.error("An error occurred while executing the model")
logger.error(e)
Expand Down
6 changes: 3 additions & 3 deletions test/test.list
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Run SBF on a known content directory and target directory
python cmd_interface.py gen_hash_random --output_sql ./examples/out/known_content_hashes.sqlite --target_directory ./examples/target_folder --known_content_directory ./examples/known_dataset --block_size 4 | head -n -2
python cmd_interface.py gen_hash_random --output_sql ./examples/out/known_content_hashes.sqlite --target_directory ./examples/target_directory --known_content_directory ./examples/known_content_directory --block_size 4 | head -n -2

# Generate a SQLite DB contains hashes of all the blocks within a source directory
python cmd_interface.py gen_hash --output_sql ./examples/out/known_content_hashes.sqlite --known_content_directory ./examples/known_dataset --block_size 4
python cmd_interface.py gen_hash --output_sql ./examples/out/known_content_hashes.sqlite --known_content_directory ./examples/known_content_directory --block_size 4

# Run SBF on a pre-generated known content directory SQLite DB and target directory
python cmd_interface.py hash_random --input_sql ./examples/out/known_content_hashes.sqlite --target_directory ./examples/target_folder --block_size 4 | head -n -2
python cmd_interface.py hash_random --input_sql ./examples/out/known_content_hashes.sqlite --target_directory ./examples/target_directory --block_size 4 | head -n -2
Loading

0 comments on commit 2a8b0c8

Please sign in to comment.