Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mweidling committed Mar 4, 2024
0 parents commit f5ab3ef
Show file tree
Hide file tree
Showing 11 changed files with 730 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
venv
34 changes: 34 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings
# Container Scanning customization: https://docs.gitlab.com/ee/user/application_security/container_scanning/#customizing-the-container-scanning-settings
# Note that environment variables can be set in several places
# See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence
stages:
- test
- build

sast:
stage: test
include:
- template: Security/SAST.gitlab-ci.yml

variables:
DEST: "docker.gitlab.gwdg.de/subugoe/ocr-d/quiver-mongoapi-local"
IMAGE_NAME: "${DEST}:${CI_COMMIT_BRANCH}-${CI_COMMIT_SHORT_SHA}"

.build_image:
stage: build
image:
name: gcr.io/kaniko-project/executor:debug
entrypoint:
- ''
except:
- triggers
script:
- echo "Pushing image to ${DEST}"
- mkdir -p /kaniko/.docker
- echo "{\"auths\":{\"${CI_REGISTRY}\":{\"auth\":\"$(printf "%s:%s" "${CI_REGISTRY_USER}" "${CI_REGISTRY_PASSWORD}" | base64 | tr -d '\n')\"}}}" > /kaniko/.docker/config.json
- /kaniko/executor
--context "${CI_PROJECT_DIR}"
--dockerfile "${CI_PROJECT_DIR}/Dockerfile"
--build-arg build_date=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
--destination ${IMAGE_NAME}
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.10-alpine

WORKDIR /code

COPY requirements.txt /code/requirements.txt

RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

COPY ./src /code

CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8084"]
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
uvicorn
fastapi
pymongo[srv]
pydantic>=2.0.0
190 changes: 190 additions & 0 deletions src/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""
API server for querying the local MongoDB and posting
data to it.
To be used by the front end.
"""

import os
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pymongo import MongoClient

from model import Model, GTModel, WorkflowModel

import gt
import workflows
import releases
import runs

origins = [
'*'
]

app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_methods=['*'],
allow_headers=['*'],
)

HOSTNAME = os.getenv('HOSTNAME')
USERNAME = os.getenv('USERNAME')
PASSWORD = os.getenv('PASSWORD')

CLIENT = MongoClient(f'mongodb://{USERNAME}:{PASSWORD}@{HOSTNAME}:27017/results?authSource=results')
DB = CLIENT.results
COLL = DB.quiver


@app.get('/api/gt')
def api_get_all_gt() -> list:
"""
Returns information about all available Ground Truth
curated by OCR-D for QuiVer
"""
return gt.get_all_gt(COLL)


@app.get('/api/gt/{gt_id}')
def api_get_gt(gt_id: str) -> list:
"""
Returns information about one available Ground Truth
curated by OCR-D for QuiVer
"""
return gt.get_gt(COLL, gt_id)


@app.post('/api/gt')
def api_post_new_gt(gt_model: GTModel) -> str:
"""
Posts information about a new Ground Truth dataset to the database.
Args:
- gt_model (GTModel): information about the GT
Return
- str: True if POST was successful
"""
return gt.post_new_gt(COLL, gt_model)


@app.get('/api/workflows')
def get_all_workflows() -> list:
"""
Returns information about all available workflows
curated by OCR-D for QuiVer
"""
return workflows.get_all_workflows(COLL)


@app.get('/api/workflows/{wf_id}')
def get_workflows(wf_id: str) -> list:
"""
Returns information about one available workflow
curated by OCR-D for QuiVer
"""
return workflows.get_workflows(COLL, wf_id)


@app.post('/api/workflows')
def post_new_workflow(workflow: WorkflowModel) -> str:
"""
Posts information about a new workflow to the database.
Args:
- workflow (WorkflowModel): information about the workflow
Return
- bool: True if POST was successful
"""
return workflows.post_new_workflow(COLL, workflow)

@app.get('/api/runs')
def get_all_runs() -> list:
"""
Returns all evalutation results for all Quiver workspaces.
"""
return runs.get_all_runs(COLL)


@app.get('/api/runs/latest')
def get_all_latest_runs() -> list:
"""
Returns all evalutation results for all Quiver workspaces.
"""
return runs.get_all_latest_runs(COLL)


@app.get('/api/runs/{gt_id}')
def get_all_runs_by_gt(gt_id: str,
start_date: str | None = None,
end_date: str | None = None) -> list:
"""
Returns evalutation results for all Quiver workspaces with a
given GT
Args:
- gt_id (id): The ID of the GT data used for a run
"""
return runs.get_all_runs_by_gt(COLL, gt_id, start_date, end_date)


@app.get('/api/runs/{gt_id}/latest')
def get_latest_runs_per_gt(gt_id: str) -> list:
"""
Returns evalutation results for the latest Quiver workspace with a
given GT
Args:
- gt_id (id): The ID of the GT data used for a run
"""
return runs.get_latest_runs_per_gt(COLL, gt_id)


@app.get('/api/runs/{gt_id}/{workflow_id}')
def get_all_runs_by_gt_and_wf(workflow_id: str,
gt_id: str,
start_date: str | None = None,
end_date: str | None = None) -> list:
"""
Returns evalutation results for all Quiver workspaces with a
given workflow and GT
Args:
- workflow_id (str): The ID of the workflow used for a run
- gt_id (id): The ID of the GT data used for a run
"""
return runs.get_all_runs_by_gt_and_wf(COLL, workflow_id, gt_id, start_date, end_date)


@app.get('/api/runs/{gt_id}/{workflow_id}/latest')
def get_latest_runs(workflow_id: str,
gt_id: str) -> list:
"""
Returns evalutation results for the latest Quiver workspace with a
given workflow and GT
Args:
- workflow_id (str): The ID of the workflow used for a run
- gt_id (id): The ID of the GT data used for a run
"""
return runs.get_latest_runs(COLL, workflow_id, gt_id)


@app.post("/api/runs")
def post_new_result(data: Model):
"""
Posts information about a new evaluation workspace to the database.
Args:
- data (Model): information about the evaluation workspace
Return
- bool: True if POST was successful
"""
return runs.post_new_result(COLL, data)

@app.get("/api/releases")
def get_releases():
"""
Returns a list of all releases for which Quiver provides data.
"""
return releases.get_all_releases(COLL)
17 changes: 17 additions & 0 deletions src/commons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
Functions used by several modules.
"""

def remove_mongodb_id_from_result(json_data, result_type) -> list:
"""
When retrieving results from MongoDB, the Mongo ID is included by the DB.
This function removes this ID from the result.
"""
purged_list = []
for obj in json_data:
d = dict(obj)
del d['_id']
if result_type == 'gt':
d = d['gt_workspace']
purged_list.append(d)
return purged_list
45 changes: 45 additions & 0 deletions src/gt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
This module handles all operations that deal with Ground Truth Data.
"""

import json
from bson import json_util
from re import compile
from pymongo import collection
from model import GTModel
from commons import remove_mongodb_id_from_result

def get_all_gt(coll: collection.Collection) -> list:
"""
Returns information about all available Ground Truth
curated by OCR-D for QuiVer
"""
cursor = coll.find({'gt_workspace': {'$exists': True}})
json_data = json.loads(json_util.dumps(cursor))

return remove_mongodb_id_from_result(json_data, 'gt')


def get_gt(coll: collection.Collection, gt_id: str) -> list:
"""
Returns information about one available Ground Truth
curated by OCR-D for QuiVer
"""
gt_regex = compile(gt_id)
cursor = coll.find({'gt_workspace.id': gt_regex})
json_data = json.loads(json_util.dumps(cursor))

return remove_mongodb_id_from_result(json_data, 'gt')


def post_new_gt(coll: collection.Collection, gt: GTModel) -> str:
"""
Posts information about a new Ground Truth dataset to the database.
Args:
- gt (GTModel): information about the GT
Return
- bool: True if POST was successful
"""
result = coll.insert_one(gt.model_dump())
return str(result.acknowledged)
Loading

0 comments on commit f5ab3ef

Please sign in to comment.