Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON-LD Metadata for JSON API #92

Merged
merged 14 commits into from
Feb 3, 2025
Merged
8 changes: 7 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
data
s3data
dev/docker/data
dev/docker/data
tests/mock_data
.conda
.venv
.pytest_cache
docs
scr/api/static
39 changes: 31 additions & 8 deletions src/api/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,34 @@
from sqlmodel import SQLModel
from sqlalchemy import create_engine, MetaData, text
from .environment import DB_NAME, SQLALCHEMY_DEBUG, SQLALCHEMY_DATABASE_URL

# Do not remove. Sqlalchemy needs this import to create tables
from . import models # noqa: F401
import logging

from psycopg2.extras import Json

logging.basicConfig(level=logging.INFO)

LAST_MAST_SHOT = 30471 # This is the last MAST shot before MAST-U


class Context(str, Enum):
DCAT = "http://www.w3.org/ns/dcat#"
DCT = "http://purl.org/dc/terms/"
DCD = "http://purl.org/dc/dcmitype/"
FOAF = "http://xmlns.com/foaf/0.1/"
SCHEMA = "schema.org"
DQV = "http://www.w3.org/ns/dqv#"
SDMX = "http://purl.org/linked-data/sdmx/2009/measure#"

base_context = {
"dct": Context.DCT,
"schema": Context.SCHEMA,
"dqv": Context.DQV,
"sdmx-measure": Context.SDMX,
"dcat": Context.DCAT,
"dcd": Context.DCD
}

class URLType(Enum):
"""Enum type for different types of storage endpoint"""

Expand Down Expand Up @@ -110,9 +127,12 @@ def create_user(self):
def create_cpf_summary(self, data_path: Path):
"""Create the CPF summary table"""
paths = data_path.glob("cpf/*_cpf_columns.parquet")
for path in paths:
df = pd.read_parquet(path)
df.to_sql("cpf_summary", self.uri, if_exists="replace")
dfs = [pd.read_parquet(path) for path in paths]
df = pd.concat(dfs).reset_index(drop=True)
df["context"] = [Json(base_context)] * len(df)
df = df.drop_duplicates(subset=['name'])
df.to_sql("cpf_summary", self.uri, if_exists="append")


def create_scenarios(self, data_path: Path):
"""Create the scenarios metadata table"""
Expand All @@ -123,6 +143,7 @@ def create_scenarios(self, data_path: Path):

data = pd.DataFrame(dict(id=ids, name=scenarios)).set_index("id")
data = data.dropna()
data["context"] = [Json(base_context)] * len(data)
data.to_sql("scenarios", self.uri, if_exists="append")

def create_shots(self, data_path: Path):
Expand All @@ -141,6 +162,7 @@ def create_shots(self, data_path: Path):
shot_metadata["scenario"] = shot_metadata["scenario_id"]
shot_metadata["facility"] = "MAST"
shot_metadata = shot_metadata.drop(["scenario_id", "reference_id"], axis=1)
shot_metadata["context"] = [Json(base_context)] * len(shot_metadata)
shot_metadata["uuid"] = shot_metadata.index.map(get_dataset_uuid)
shot_metadata["url"] = (
"s3://mast/level1/shots/" + shot_metadata.index.astype(str) + ".zarr"
Expand Down Expand Up @@ -187,7 +209,7 @@ def create_signals(self, data_path: Path):
df = signals_metadata
df = df[df.shot_id <= LAST_MAST_SHOT]
df = df.drop_duplicates(subset="uuid")

df["context"] = [Json(base_context)] * len(df)
df["shape"] = df["shape"].map(lambda x: x.tolist())
df["dimensions"] = df["dimensions"].map(lambda x: x.tolist())

Expand All @@ -209,13 +231,14 @@ def create_sources(self, data_path: Path):
source_metadata = pd.read_parquet(data_path / "sources.parquet")
source_metadata = source_metadata.drop_duplicates("uuid")
source_metadata = source_metadata.loc[source_metadata.shot_id <= LAST_MAST_SHOT]
source_metadata["context"] = [Json(base_context)] * len(source_metadata)
source_metadata["url"] = (
"s3://mast/level1/shots/"
+ source_metadata["shot_id"].map(str)
+ ".zarr/"
+ source_metadata["name"]
)
column_names = ["uuid", "shot_id", "name", "description", "quality", "url"]
column_names = ["uuid", "shot_id", "name", "description", "quality", "url", "context"]
source_metadata = source_metadata[column_names]
source_metadata.to_sql("sources", self.uri, if_exists="append", index=False)

Expand Down Expand Up @@ -243,7 +266,7 @@ def create_db_and_tables(data_path):

# populate the database tables
logging.info("Create CPF summary")
client.create_cpf_summary(data_path / "cpf")
client.create_cpf_summary(data_path)

logging.info("Create Scenarios")
client.create_scenarios(data_path)
Expand Down
2 changes: 1 addition & 1 deletion src/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def execute_query_all(db: Session, query: Query):

def execute_query_one(db: Session, query: Query):
item = db.execute(query).one()[0]
item = item.dict(exclude_none=True)
item = item.dict(exclude_none=True, by_alias=True)
return item


Expand Down
Loading
Loading