Skip to content

Commit

Permalink
Merge pull request #1688 from gchq/feature/BAI-1540-handle-modelscan-…
Browse files Browse the repository at this point in the history
…errors

Check for errors in ModelScan API result
  • Loading branch information
PE39806 authored Jan 7, 2025
2 parents f04dffc + 26269d8 commit dc56cb8
Show file tree
Hide file tree
Showing 15 changed files with 574 additions and 190 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/modelscan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ jobs:
run: |
cd lib/modelscan_api
python -m pytest
# Pytest -m integration
- name: Run integration testing
run: |
cd lib/modelscan_api
python -m pytest -m integration
25 changes: 13 additions & 12 deletions backend/src/clients/modelScan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,19 @@ interface ModelScanResponse {
skipped_files: string[]
}
}
issues: [
{
description: string
operator: string
module: string
source: string
scanner: string
severity: string
},
]
// TODO: currently unknown what this might look like
errors: object[]
issues: {
description: string
operator: string
module: string
source: string
scanner: string
severity: string
}[]
errors: {
category: string
description: string
source: string
}[]
}

export async function getModelScanInfo() {
Expand Down
16 changes: 16 additions & 0 deletions backend/src/connectors/fileScanning/modelScan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector {
try {
const scanResults = await scanStream(s3Stream, file.name, file.size)

if (scanResults.errors.length !== 0) {
log.error(
{ errors: scanResults.errors, modelId: file.modelId, fileId: file._id, name: file.name },
'Scan errored.',
)
return [
{
toolName: modelScanToolName,
state: ScanState.Error,
scannerVersion: modelscanVersion,
lastRunAt: new Date(),
},
]
}

const issues = scanResults.summary.total_issues
const isInfected = issues > 0
const viruses: string[] = []
Expand Down Expand Up @@ -91,6 +106,7 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector {
{
toolName: modelScanToolName,
state: ScanState.Error,
scannerVersion: modelscanVersion,
lastRunAt: new Date(),
},
]
Expand Down
11 changes: 10 additions & 1 deletion lib/modelscan_api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,21 @@ pre-commit install

### Tests

To run the tests:
To run the unit tests:

```bash
pytest
```

To run the integration tests (does not require any externally running services):

```bash
pytest -m integration
```

Note that the integration tests use safe but technically "malicious" file(s) to check ModelScan's performance. Please
refer to [test_integration](./tests/test_integration/README.md) for details.

### Running

To run in [dev mode](https://fastapi.tiangolo.com/fastapi-cli/#fastapi-dev):
Expand Down
19 changes: 15 additions & 4 deletions lib/modelscan_api/bailo_modelscan_api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import logging
import re
from pathlib import Path

logger = logging.getLogger(__name__)
Expand All @@ -21,7 +22,17 @@ def parse_path(path: str | Path | None) -> Path:
return Path().cwd() if path == "." else Path(path).absolute()


def safe_join(root_dir: str | Path | None, filename: str | Path) -> Path:
def sanitise_unix_filename(filename: str) -> str:
"""Safely convert an arbitrary string to a valid unix filename by only preserving explicitly allowed characters as per https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
Note that this is not safe for Windows users as it doesn't check for reserved words e.g. CON and AUX.
:param filename: the untrusted filename to be sanitised
:return: a valid filename with trusted characters
"""
return re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", "-", filename)


def safe_join(root_dir: str | Path | None, filename: str) -> Path:
"""Combine a trusted directory path with an untrusted filename to get a full path.
:param root_dir: Trusted path/directory.
Expand All @@ -33,13 +44,13 @@ def safe_join(root_dir: str | Path | None, filename: str | Path) -> Path:
if not filename or not str(filename).strip():
raise ValueError("filename must not be empty")

stripped_filename = Path(str(filename)).name.strip()
safe_filename = sanitise_unix_filename(filename).strip()

if not stripped_filename:
if not safe_filename:
raise ValueError("filename must not be empty")

parent_dir = parse_path(root_dir).resolve()
full_path = parent_dir.joinpath(stripped_filename).resolve()
full_path = parent_dir.joinpath(safe_filename).resolve()
if not full_path.is_relative_to(parent_dir):
raise ValueError("Could not safely join paths.")

Expand Down
119 changes: 99 additions & 20 deletions lib/modelscan_api/bailo_modelscan_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,107 @@ async def info(settings: Annotated[Settings, Depends(get_settings)]) -> ApiInfor
"description": "modelscan returned results",
"content": {
"application/json": {
"example": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 1, "HIGH": 0, "CRITICAL": 0},
"total_issues": 1,
"input_path": "/foo/bar/unsafe_model.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["unsafe_model.h5"]},
"skipped": {"total_skipped": 0, "skipped_files": []},
"examples": {
"Normal": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0},
"total_issues": 0,
"input_path": "/foo/bar/safe_model.pkl",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["safe_model.pkl"]},
"skipped": {
"total_skipped": 0,
"skipped_files": [],
},
},
"issues": [],
"errors": [],
}
},
"Issue": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 1, "HIGH": 0, "CRITICAL": 0},
"total_issues": 1,
"input_path": "/foo/bar/unsafe_model.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 1, "scanned_files": ["unsafe_model.h5"]},
"skipped": {"total_skipped": 0, "skipped_files": []},
},
"issues": [
{
"description": "Use of unsafe operator 'Lambda' from module 'Keras'",
"operator": "Lambda",
"module": "Keras",
"source": "unsafe_model.h5",
"scanner": "modelscan.scanners.H5LambdaDetectScan",
"severity": "MEDIUM",
}
],
"errors": [],
}
},
"issues": [
{
"description": "Use of unsafe operator 'Lambda' from module 'Keras'",
"operator": "Lambda",
"module": "Keras",
"source": "unsafe_model.h5",
"scanner": "modelscan.scanners.H5LambdaDetectScan",
"severity": "MEDIUM",
"Skipped": {
"value": {
"errors": [],
"issues": [],
"summary": {
"input_path": "/foo/bar/empty.txt",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"scanned": {"total_scanned": 0},
"skipped": {
"skipped_files": [
{
"category": "SCAN_NOT_SUPPORTED",
"description": "Model Scan did not scan file",
"source": "empty.txt",
}
],
"total_skipped": 1,
},
"timestamp": "2024-11-19T12:00:00.000000",
"total_issues": 0,
"total_issues_by_severity": {"CRITICAL": 0, "HIGH": 0, "LOW": 0, "MEDIUM": 0},
},
}
],
"errors": [],
},
"Error": {
"value": {
"summary": {
"total_issues_by_severity": {"LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0},
"total_issues": 0,
"input_path": "/foo/bar/null.h5",
"absolute_path": "/foo/bar",
"modelscan_version": "0.8.1",
"timestamp": "2024-11-19T12:00:00.000000",
"scanned": {"total_scanned": 0},
"skipped": {
"total_skipped": 1,
"skipped_files": [
{
"category": "SCAN_NOT_SUPPORTED",
"description": "Model Scan did not scan file",
"source": "null.h5",
}
],
},
},
"issues": [],
"errors": [
{
"category": "MODEL_SCAN",
"description": "Unable to synchronously open file (file signature not found)",
"source": "null.h5",
}
],
}
},
}
}
},
Expand Down
131 changes: 0 additions & 131 deletions lib/modelscan_api/bailo_modelscan_api/test_dependencies.py

This file was deleted.

Loading

0 comments on commit dc56cb8

Please sign in to comment.