Skip to content

Commit

Permalink
Replace installation_method with is_docker, is_conda, is_singularity,…
Browse files Browse the repository at this point in the history
… is_ci
  • Loading branch information
vladsavelyev committed Feb 21, 2024
1 parent 7412dfe commit 8857483
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 27 deletions.
16 changes: 10 additions & 6 deletions app/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class VisitStats(SQLModel, table=True):
"""
Table to record per-interval visit summaries.
All keys describing the platform are primary, so we have separate a usage record
All keys describing the platform are primary, so we have separate a usage record
coming from each source.
"""

Expand All @@ -34,8 +34,10 @@ class VisitStats(SQLModel, table=True):
version_multiqc: str = Field(primary_key=True)
version_python: str = Field(primary_key=True)
operating_system: str = Field(primary_key=True)
installation_method: str = Field(primary_key=True)
ci_environment: bool = Field(primary_key=True)
is_docker: bool = Field(primary_key=True)
is_singularity: bool = Field(primary_key=True)
is_conda: bool = Field(primary_key=True)
is_ci: bool = Field(primary_key=True)
count: int


Expand Down Expand Up @@ -115,8 +117,10 @@ def insert_usage_stats(visit_stats: pd.DataFrame):
and VisitStats.version_multiqc == row["version_multiqc"]
and VisitStats.version_python == row["version_python"]
and VisitStats.operating_system == row["operating_system"]
and VisitStats.installation_method == row["installation_method"]
and VisitStats.ci_environment == row["ci_environment"]
and VisitStats.is_docker == row["is_docker"]
and VisitStats.is_singularity == row["is_singularity"]
and VisitStats.is_conda == row["is_conda"]
and VisitStats.is_ci == row["is_ci"]
)
).first()
if existing_entry:
Expand All @@ -129,7 +133,7 @@ def insert_usage_stats(visit_stats: pd.DataFrame):

def insert_download_stats(df: pd.DataFrame) -> pd.DataFrame:
# df has "date" as an index. Re-adding it as a separate field with a type datetime
df["date"] = pd.to_datetime(df.index)
df["date"] = pd.to_datetime(df.index)
df = df[["date"] + [c for c in df.columns if c != "date"]] # place date first
with Session(engine) as session:
for index, row in df.iterrows():
Expand Down
51 changes: 34 additions & 17 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from asyncio import sleep

from typing import List, Dict

import sys
Expand All @@ -23,10 +25,9 @@
from fastapi_utilities import repeat_every
from github import Github
from plotly.graph_objs import Layout
from sqlalchemy.exc import IntegrityError, ProgrammingError
from sqlalchemy.exc import ProgrammingError

from app import __version__, db, models
from app.db import engine
from app.downloads import daily

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -83,8 +84,10 @@ def update_version():
"version_multiqc",
"version_python",
"operating_system",
"installation_method",
"ci_environment",
"is_docker",
"is_singularity",
"is_conda",
"is_ci",
]

# Thread-safe in-memory buffer to accumulate recent visits before writing to the CSV file
Expand All @@ -98,20 +101,25 @@ async def version(
version_multiqc: str = "",
version_python: str = "",
operating_system: str = "",
installation_method: str = "",
ci_environment: str = "",
is_docker: str = "",
is_singularity: str = "",
is_conda: str = "",
is_ci: str = "",
):
"""
Endpoint for MultiQC that returns the latest release, and logs
the visit along with basic user environment detail.
"""
await sleep(60)
background_tasks.add_task(
_log_visit,
version_multiqc=version_multiqc,
version_python=version_python,
operating_system=operating_system,
installation_method=installation_method,
ci_environment=ci_environment,
is_docker=is_docker,
is_singularity=is_singularity,
is_conda=is_conda,
is_ci=is_ci,
)
return models.VersionResponse(latest_release=app.latest_release)

Expand All @@ -120,8 +128,10 @@ def _log_visit(
version_multiqc: str = "",
version_python: str = "",
operating_system: str = "",
installation_method: str = "",
ci_environment: str = "",
is_docker: str = "",
is_singularity: str = "",
is_conda: str = "",
is_ci: str = "",
):
global visit_buffer
with visit_buffer_lock:
Expand All @@ -131,11 +141,13 @@ def _log_visit(
"version_multiqc": version_multiqc,
"version_python": version_python,
"operating_system": operating_system,
"installation_method": installation_method,
"ci_environment": ci_environment,
"is_docker": is_docker,
"is_singularity": is_singularity,
"is_conda": is_conda,
"is_ci": is_ci,
}
)
logger.info(f"Logging visit, total visits: {len(visit_buffer)}")
logger.info(f"Logging visit, total visits: {len(visit_buffer)}")


# Path to a buffer CSV file to persist recent visits before dumping to the database
Expand Down Expand Up @@ -200,20 +212,25 @@ def _summarize_visits(interval="5min") -> Response:
df["end"] = df["start"] + pd.to_timedelta(interval)
df["start"] = df["start"].dt.strftime("%Y-%m-%d %H:%M")
df["end"] = df["end"].dt.strftime("%Y-%m-%d %H:%M")
df["ci_environment"] = df["ci_environment"].apply(lambda val: strtobool(val) if val else False)
df["is_docker"] = df["is_docker"].apply(lambda val: strtobool(val) if val else False)
df["is_singularity"] = df["is_singularity"].apply(lambda val: strtobool(val) if val else False)
df["is_conda"] = df["is_conda"].apply(lambda val: strtobool(val) if val else False)
df["is_ci"] = df["is_ci"].apply(lambda val: strtobool(val) if val else False)
df = df.drop(columns=["timestamp"])

# Summarize visits per user per time interval
interval_summary = df.groupby(["start", "end"] + visit_fieldnames).size().reset_index(name="count")
if len(interval_summary) == 0:
return PlainTextResponse(content="No new visits to summarize")

logger.info(f"Summarizing {len(df)} visits in {CSV_FILE_PATH} and writing {len(interval_summary)} rows to the DB")
logger.info(
f"Summarizing {len(df)} visits in {CSV_FILE_PATH} and writing {len(interval_summary)} rows to the DB"
)
try:
db.insert_usage_stats(interval_summary)
except Exception as e:
return PlainTextResponse(
status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR,
status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR,
content=f"Failed to write to the database: {e}",
)
else:
Expand Down Expand Up @@ -291,7 +308,7 @@ async def summarize_visits_endpoint():
except Exception as e:
raise HTTPException(status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR, detail=str(e))

@app.post("/update_download_stats")
@app.post("/update_downloads")
async def update_downloads_endpoint(background_tasks: BackgroundTasks):
try:
background_tasks.add_task(_update_download_stats)
Expand Down
12 changes: 8 additions & 4 deletions app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ class UsageCategory(str, Enum):
version_python = "version_python"
version_python_simple = "version_python_simple"
operating_system = "operating_system"
installation_method = "installation_method"
ci_environment = "ci_environment"
is_docker = "is_docker"
is_singularity = "is_singularity"
is_conda = "is_conda"
is_ci = "is_ci"


usage_category_nicenames = dict(
Expand All @@ -83,6 +85,8 @@ class UsageCategory(str, Enum):
version_python="Python version",
version_python_simple="Python version (simple)",
operating_system="Operating system",
installation_method="Installation method",
ci_environment="CI environment",
is_docker="Docker",
is_singularity="Singularity",
is_conda="Conda",
is_ci="CI environment",
)

0 comments on commit 8857483

Please sign in to comment.