-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add test data generation tool. (#217)
Added a tool to populate AIPscan with randomly generated example data.
- Loading branch information
Showing
9 changed files
with
338 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
-r base.txt | ||
|
||
faker==14.2.1 | ||
flake8==5.0.4 | ||
pytest==6.2.5 | ||
pytest_cov==2.11.1 | ||
|
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import os | ||
import sys | ||
|
||
relpath = f"{os.path.dirname(__file__)}/../../../AIPscan" | ||
sys.path.append(os.path.abspath(relpath)) | ||
|
||
config_name = "default" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/usr/bin/env python3 | ||
import sys | ||
|
||
import click | ||
from app import init | ||
from faker import Faker | ||
from flask import Flask | ||
from helpers import data | ||
|
||
from AIPscan import db | ||
from AIPscan.models import FetchJob | ||
from config import CONFIGS | ||
|
||
|
||
@click.command() | ||
@click.option("--storage-services-to-create", default=2) | ||
@click.option("--locations-per-storage-service", default=2) | ||
@click.option("--locations-min-aip-count", default=10) | ||
@click.option("--locations-max-aip-count", default=30) | ||
@click.option("--aip-min-file-count", default=10) | ||
@click.option("--aip-max-file-count", default=30) | ||
def main( | ||
storage_services_to_create, | ||
locations_per_storage_service, | ||
locations_min_aip_count, | ||
locations_max_aip_count, | ||
aip_min_file_count, | ||
aip_max_file_count, | ||
): | ||
# Initialize Flash app context | ||
app = Flask(__name__) | ||
app.config.from_object(CONFIGS[init.config_name]) | ||
|
||
db.init_app(app) | ||
|
||
fake = Faker() | ||
fake.seed_instance(0) | ||
randint = fake.random.randint | ||
|
||
with app.app_context(): | ||
# Add example storage services | ||
print(f"Creating pipeline and {storage_services_to_create} storage services...") | ||
pipeline = data.create_fake_pipeline() | ||
|
||
ss_ids = [] | ||
fetch_jobs = {} | ||
|
||
for _ in range(storage_services_to_create): | ||
is_default = len(ss_ids) == 0 | ||
|
||
ss = data.create_fake_storage_service(is_default) | ||
ss_ids.append(ss.id) | ||
|
||
fetch_job = data.create_fake_fetch_job(ss.id) | ||
fetch_jobs[ss.id] = fetch_job.id | ||
|
||
# Populate storage service locations | ||
ss_locations_to_create = ( | ||
storage_services_to_create * locations_per_storage_service | ||
) | ||
|
||
print( | ||
f"Creating {ss_locations_to_create} storage service locations (and their AIPs)..." | ||
) | ||
|
||
aip_batches_created = 0 | ||
total_aip_batches = len(ss_ids) * locations_per_storage_service | ||
for ss_id in ss_ids: | ||
for _ in range(locations_per_storage_service): | ||
# Add location | ||
sl = data.create_fake_location(ss_id) | ||
|
||
# Add AIPs | ||
aip_batches_created += 1 | ||
|
||
print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...") | ||
|
||
aipcount = 0 | ||
for _ in range( | ||
1, randint(locations_min_aip_count, locations_max_aip_count) | ||
): | ||
aip = data.create_fake_aip( | ||
pipeline.id, ss_id, sl.id, fetch_jobs[ss.id] | ||
) | ||
data.create_fake_aip_files( | ||
aip_min_file_count, aip_max_file_count, aip.id | ||
) | ||
aipcount += 1 | ||
|
||
# Update package/AIP counts in fetch job | ||
fetch_job = FetchJob.query.get(fetch_jobs[ss_id]) | ||
fetch_job.total_packages += aipcount | ||
fetch_job.total_aips += aipcount | ||
db.session.commit() | ||
|
||
print("Done.") | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import os | ||
from datetime import date | ||
|
||
from faker import Faker | ||
|
||
from AIPscan import db | ||
from AIPscan.models import ( | ||
AIP, | ||
FetchJob, | ||
File, | ||
Pipeline, | ||
StorageLocation, | ||
StorageService, | ||
) | ||
|
||
fake = Faker() | ||
randint = fake.random.randint | ||
|
||
|
||
def create_fake_pipeline(): | ||
pipeline = Pipeline(origin_pipeline=fake.uuid4(), dashboard_url=fake.url()) | ||
|
||
db.session.add(pipeline) | ||
db.session.commit() | ||
|
||
return pipeline | ||
|
||
|
||
def create_fake_storage_service(default): | ||
ss = StorageService( | ||
name=fake.text(20)[:-1], | ||
url=fake.url(), | ||
user_name=fake.profile()["username"], | ||
api_key=fake.password(), | ||
download_limit=0, | ||
download_offset=0, | ||
default=default, | ||
) | ||
|
||
db.session.add(ss) | ||
db.session.commit() | ||
|
||
return ss | ||
|
||
|
||
def create_fake_fetch_job(storage_service_id): | ||
fetch_job = FetchJob( | ||
total_packages=0, | ||
total_aips=0, | ||
total_deleted_aips=0, | ||
download_start=date.today(), | ||
download_end=date.today(), | ||
download_directory=fake.file_path(), | ||
storage_service_id=storage_service_id, | ||
) | ||
fetch_job.total_dips = 0 | ||
fetch_job.total_sips = 0 | ||
fetch_job.total_replicas = 0 | ||
|
||
db.session.add(fetch_job) | ||
db.session.commit() | ||
|
||
return fetch_job | ||
|
||
|
||
def create_fake_location(storage_service_id): | ||
current_location = os.path.join(os.path.dirname(fake.file_path(3)), fake.uuid4()) | ||
|
||
location = StorageLocation( | ||
current_location=current_location, | ||
description=fake.text(20)[:-1], | ||
storage_service_id=storage_service_id, | ||
) | ||
|
||
db.session.add(location) | ||
db.session.commit() | ||
|
||
return location | ||
|
||
|
||
def create_fake_aip(pipeline_id, storage_service_id, storage_location_id, fetch_job_id): | ||
aip = AIP( | ||
uuid=fake.uuid4(), | ||
transfer_name=fake.text(20)[:-1], | ||
create_date=date.today(), | ||
mets_sha256=fake.sha256(), | ||
size=randint(10000, 100_000_000), | ||
storage_service_id=storage_service_id, | ||
storage_location_id=storage_location_id, | ||
fetch_job_id=fetch_job_id, | ||
origin_pipeline_id=pipeline_id, | ||
) | ||
|
||
db.session.add(aip) | ||
db.session.commit() | ||
|
||
return aip | ||
|
||
|
||
def create_fake_aip_files(min, max, aip_id): | ||
for _ in range(1, randint(min, max)): | ||
aipfile = File( | ||
aip_id=aip_id, | ||
name=fake.text(20)[:-1], | ||
filepath=fake.file_path(), | ||
uuid=fake.uuid4(), | ||
file_type="original", | ||
size=randint(1000, 1_000_000), | ||
date_created=date.today(), | ||
puid=fake.text(20)[:-1], | ||
file_format=fake.text(20)[:-1], | ||
format_version=fake.text(20)[:-1], | ||
checksum_type=fake.text(20)[:-1], | ||
checksum_value=fake.text(20)[:-1], | ||
premis_object="", | ||
) | ||
|
||
db.session.add(aipfile) | ||
db.session.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import datetime | ||
|
||
import pytest | ||
|
||
from .tools.helpers import data | ||
|
||
|
||
@pytest.fixture | ||
def mock_db_add(mocker): | ||
mocker.patch("AIPscan.db.session.add") | ||
mocker.patch("AIPscan.db.session.commit") | ||
|
||
|
||
def test_create_fake_storage_service(mock_db_add): | ||
ss = data.create_fake_storage_service(True) | ||
|
||
assert ss.name | ||
assert type(ss.name) == str | ||
|
||
assert ss.url | ||
assert type(ss.url) == str | ||
|
||
assert ss.user_name | ||
assert type(ss.user_name) == str | ||
|
||
assert ss.api_key | ||
assert type(ss.api_key) == str | ||
|
||
assert ss.default | ||
assert type(ss.default) == bool | ||
|
||
ss = data.create_fake_storage_service(False) | ||
assert not ss.default | ||
|
||
|
||
def test_create_fake_fetch_job(mock_db_add): | ||
ss = data.create_fake_storage_service(True) | ||
ss.id = 1 | ||
|
||
fetch_job = data.create_fake_fetch_job(ss.id) | ||
|
||
assert fetch_job.download_start | ||
assert type(fetch_job.download_start) == datetime.date | ||
|
||
assert fetch_job.download_end | ||
assert type(fetch_job.download_end) == datetime.date | ||
|
||
assert fetch_job.download_directory | ||
assert type(fetch_job.download_directory) == str | ||
|
||
assert fetch_job.storage_service_id == ss.id | ||
|
||
|
||
def test_create_fake_location(mock_db_add): | ||
location = data.create_fake_location(1) | ||
|
||
assert location.current_location | ||
assert type(location.current_location) == str | ||
|
||
assert location.description | ||
assert type(location.description) == str | ||
|
||
assert location.storage_service_id == 1 | ||
|
||
|
||
def test_create_fake_aip(mock_db_add): | ||
aip = data.create_fake_aip(1, 2, 3, 4) | ||
|
||
assert aip.uuid | ||
assert type(aip.uuid) == str | ||
|
||
assert aip.transfer_name | ||
assert type(aip.transfer_name) == str | ||
|
||
assert aip.create_date | ||
assert type(aip.create_date) == datetime.date | ||
|
||
assert aip.mets_sha256 | ||
assert type(aip.mets_sha256) == str | ||
|
||
assert aip.size | ||
assert type(aip.size) == int | ||
|
||
assert aip.origin_pipeline_id == 1 | ||
assert aip.storage_service_id == 2 | ||
assert aip.storage_location_id == 3 | ||
assert aip.fetch_job_id == 4 | ||
assert aip.origin_pipeline_id == 1 |