Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create proper enums for job statuses #412

Merged
merged 27 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
41313cd
Job status added
michalkrzem Oct 1, 2024
f4bae19
Job status with alembic migrations
michalkrzem Oct 1, 2024
d028b53
In INSTALL.md and README.md alembic command added.
michalkrzem Oct 1, 2024
1498a44
ignore enum type error
michalkrzem Oct 1, 2024
98621c7
Creating jobstatus enum by job.py site
michalkrzem Oct 1, 2024
e0ce4f2
Creating jobstatus enum in migration file
michalkrzem Oct 2, 2024
5525bdb
After review
michalkrzem Oct 8, 2024
db4e0e9
lint
michalkrzem Oct 8, 2024
3b4c309
Merge branch 'master' into 370-create-proper-enums-for-job-statuses
michalkrzem Oct 8, 2024
02ae102
Added recognized enum changes and removing removed status
michalkrzem Oct 8, 2024
690051d
with nmp errors
michalkrzem Oct 9, 2024
ef9e9d5
delete removed
michalkrzem Oct 9, 2024
8556a75
alembic_postgresql_enum for automatic enum type recognize
michalkrzem Oct 9, 2024
71f42ef
lint
michalkrzem Oct 9, 2024
5d78c6b
Create jobstatus enum type in migration file.
michalkrzem Oct 9, 2024
9b83297
black
michalkrzem Oct 9, 2024
e909efe
After revie - job status as a enum, match and jobagent with ON DELETE…
michalkrzem Oct 17, 2024
b3ddd37
Merge branch 'master' into 370-create-proper-enums-for-job-statuses
michalkrzem Oct 17, 2024
855380b
'version' is obsolete
michalkrzem Oct 17, 2024
34a5d2d
'version' is obsolete
michalkrzem Oct 17, 2024
930b064
fix migrations
michalkrzem Oct 17, 2024
4bb1b89
.
michalkrzem Oct 17, 2024
beabd17
.
michalkrzem Oct 17, 2024
a63cdf1
fix import in add_jobstatus migration
Oct 17, 2024
7835dce
black
Oct 17, 2024
3db4bfa
After review - migration modified
michalkrzem Oct 21, 2024
d9da4c5
black
michalkrzem Oct 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ uvicorn==0.30.6
wrapt==1.16.0
yara-python==4.5.1
yaramod==3.23.0
alembic_postgresql_enum
30 changes: 15 additions & 15 deletions src/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
and_,
update,
col,
delete,
)

from .models.agentgroup import AgentGroup
from .models.configentry import ConfigEntry
from .models.job import Job
from .models.job import Job, JobStatus
from .models.jobagent import JobAgent
from .models.match import Match
from .schema import MatchesSchema, ConfigSchema
Expand Down Expand Up @@ -67,7 +68,11 @@ def cancel_job(self, job: JobId, error=None) -> None:
session.execute(
update(Job)
.where(Job.id == job)
.values(status="cancelled", finished=int(time()), error=error)
.values(
status=JobStatus.cancelled,
finished=int(time()),
error=error,
)
)
session.commit()

Expand All @@ -85,23 +90,18 @@ def get_job(self, job: JobId) -> Job:
return self.__get_job(session, job)

def get_valid_jobs(self, username_filter: Optional[str]) -> List[Job]:
"""Retrieves valid (accessible and not removed) jobs from the database."""
"""Retrieves valid (accessible) jobs from the database."""
with self.session() as session:
query = (
select(Job)
.where(Job.status != "removed")
.order_by(col(Job.submitted).desc())
)
query = select(Job).order_by(col(Job.submitted).desc())
if username_filter:
query = query.where(Job.rule_author == username_filter)
return session.exec(query).all()

def remove_query(self, job: JobId) -> None:
"""Sets the job status to removed."""
"""Delete the job, linked match and job agent from the database."""
with self.session() as session:
session.execute(
update(Job).where(Job.id == job).values(status="removed")
)
delete_query = delete(Job).where(Job.id == job)
session.execute(delete_query)
session.commit()

def add_match(self, job: JobId, match: Match) -> None:
Expand Down Expand Up @@ -149,7 +149,7 @@ def agent_finish_job(self, job: Job) -> None:
session.execute(
update(Job)
.where(Job.internal_id == job.internal_id)
.values(finished=int(time()), status="done")
.values(finished=int(time()), status=JobStatus.done)
)
session.commit()

Expand Down Expand Up @@ -220,7 +220,7 @@ def init_job_datasets(self, job: JobId, num_datasets: int) -> None:
.values(
total_datasets=num_datasets,
datasets_left=num_datasets,
status="processing",
status=JobStatus.processing,
)
)
session.commit()
Expand Down Expand Up @@ -253,7 +253,7 @@ def create_search_task(
with self.session() as session:
obj = Job(
id=job,
status="new",
status=JobStatus.new,
rule_name=rule_name,
rule_author=rule_author,
raw_yara=raw_yara,
Expand Down
43 changes: 43 additions & 0 deletions src/migrations/versions/6b495d5a4855_add_jobstatus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""add jobstatus
Revision ID: 6b495d5a4855
Revises: dbb81bd4d47f
Create Date: 2024-10-15 08:17:30.036531
"""
from alembic import op
import sqlalchemy as sa

from mquery.models.job import JobStatus # type: ignore # noqa

# revision identifiers, used by Alembic.
revision = "6b495d5a4855"
down_revision = "dbb81bd4d47f"
branch_labels = None
depends_on = None

job_status = sa.Enum(JobStatus, name="jobstatus")


def upgrade() -> None:
op.execute("DELETE FROM job WHERE status = 'removed';")

job_status.create(op.get_bind())
op.alter_column(
"job",
"status",
existing_type=sa.VARCHAR(),
type_=job_status,
postgresql_using="status::jobstatus",
nullable=True,
)


def downgrade() -> None:
op.alter_column(
"job",
"status",
existing_type=job_status,
type_=sa.VARCHAR(),
nullable=False,
)

op.execute("DROP TYPE IF EXISTS jobstatus")
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""on delete cascade for jobagent and match
Revision ID: f654d7e4fcc7
Revises: 6b495d5a4855
Create Date: 2024-10-17 07:16:34.262079
"""
from alembic import op


# revision identifiers, used by Alembic.
revision = "f654d7e4fcc7"
down_revision = "6b495d5a4855"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order of migrations is wrong. This will run after the attempted jobs removal.

I've tested it:

mquery-dev-web-1       | WARNING:  StatReload detected changes in 'tasks.py'. Reloading...
mquery-dev-web-1       | INFO:     Started server process [24]
mquery-dev-web-1       | INFO:     Waiting for application startup.
mquery-dev-web-1       | INFO  [alembic.runtime.migration] Context impl PostgresqlImpl.
mquery-dev-web-1       | INFO  [alembic.runtime.migration] Will assume transactional DDL.
mquery-dev-web-1       | INFO  [alembic.runtime.migration] Running upgrade dbb81bd4d47f -> 6b495d5a4855, add jobstatus
mquery-dev-web-1       | Revision ID: 6b495d5a4855
mquery-dev-web-1       | Revises: dbb81bd4d47f
mquery-dev-web-1       | Create Date: 2024-10-15 08:17:30.036531
mquery-postgres-1      | 2024-10-18 14:31:24.310 UTC [51] ERROR:  update or delete on table "job" violates foreign key constraint "match_job_id_fkey" on table "match"
mquery-postgres-1      | 2024-10-18 14:31:24.310 UTC [51] DETAIL:  Key (internal_id)=(34) is still referenced from table "match".
mquery-postgres-1      | 2024-10-18 14:31:24.310 UTC [51] STATEMENT:  DELETE FROM job WHERE status = 'removed';

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both migrations can be probably merged into one to fix this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, yes, I forgot about this :/ So, one big migration file with create_foreign_key with ondelete="CASCADE", delete "removed" status, create jobstatus and alter table with new type?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, sounds good. A benefit of one big migration is that if something goes wrong then a rollback is performed (so the db is not stuck in an invalid state).

branch_labels = None
depends_on = None


def upgrade() -> None:
op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey")
op.create_foreign_key(
constraint_name="jobagent_job_id_fkey",
source_table="jobagent",
referent_table="job",
local_cols=["job_id"],
remote_cols=["internal_id"],
ondelete="CASCADE",
)

op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey")
op.create_foreign_key(
constraint_name="match_job_id_fkey",
source_table="match",
referent_table="job",
local_cols=["job_id"],
remote_cols=["internal_id"],
ondelete="CASCADE",
)


def downgrade() -> None:
op.drop_constraint("jobagent_job_id_fkey", "jobagent", type_="foreignkey")
op.create_foreign_key(
constraint_name="jobagent_job_id_fkey",
source_table="jobagent",
referent_table="job",
local_cols=["job_id"],
remote_cols=["internal_id"],
)

op.drop_constraint("match_job_id_fkey", "match", type_="foreignkey")
op.create_foreign_key(
constraint_name="match_job_id_fkey",
source_table="match",
referent_table="job",
local_cols=["job_id"],
remote_cols=["internal_id"],
)
18 changes: 17 additions & 1 deletion src/models/job.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import enum

from sqlalchemy.dialects import postgresql

from sqlmodel import SQLModel, Field, ARRAY, String, Column, Relationship
from typing import Optional, List, Union, TYPE_CHECKING

Expand All @@ -6,11 +10,20 @@
from ..models.jobagent import JobAgent


class JobStatus(enum.Enum):
done = "done"
new = "new"
cancelled = "cancelled"
processing = "processing"


class JobView(SQLModel):
"""Public fields of mquery jobs."""

__table_args__ = {"extend_existing": True}

id: str
status: str
status: JobStatus = Field(sa_column=Column(postgresql.ENUM(JobStatus, name="jobstatus"))) # type: ignore
error: Optional[str]
rule_name: str
rule_author: str
Expand All @@ -29,6 +42,9 @@ class JobView(SQLModel):
total_datasets: int
agents_left: int

class Config:
arbitrary_types_allowed = True


class Job(JobView, table=True):
"""Job object in the database. Internal ID is an implementation detail."""
Expand Down
5 changes: 4 additions & 1 deletion src/models/jobagent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sqlalchemy import Column, ForeignKey
from sqlmodel import SQLModel, Field, Relationship
from typing import Union, TYPE_CHECKING

Expand All @@ -12,7 +13,9 @@ class JobAgent(SQLModel, table=True):
id: Union[int, None] = Field(default=None, primary_key=True)
task_in_progress: int

job_id: int = Field(foreign_key="job.internal_id")
job_id: int = Field(
sa_column=Column(ForeignKey("job.internal_id", ondelete="CASCADE"))
)
job: "Job" = Relationship(back_populates="agents")

agent_id: int = Field(foreign_key="agentgroup.id")
Expand Down
5 changes: 4 additions & 1 deletion src/models/match.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sqlalchemy import ForeignKey
from sqlmodel import SQLModel, Field, ARRAY, String, Column, JSON, Relationship
from typing import List, Union, Dict, Any

Expand All @@ -15,5 +16,7 @@ class Match(SQLModel, table=True):
# A list of yara rules matched to this file
matches: List[str] = Field(sa_column=Column(ARRAY(String)))

job_id: int = Field(foreign_key="job.internal_id")
job_id: int = Field(
sa_column=Column(ForeignKey("job.internal_id", ondelete="CASCADE"))
)
job: Job = Relationship(back_populates="matches")
4 changes: 2 additions & 2 deletions src/mqueryfront/src/utils.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
export const isStatusFinished = (status) =>
["done", "cancelled", "removed"].includes(status);
["done", "cancelled"].includes(status);

const statusClassMap = {
done: "success",
new: "info",
processing: "info",
cancelled: "danger",
removed: "dark",
};

export const isAuthEnabled = (config) =>
Expand All @@ -16,6 +15,7 @@ export const openidLoginUrl = (config) => {
if (config["openid_url"] === null || config["openid_client_id"] === null) {
// Defensive programming - config keys can be null.
return "#";
gi;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still typo

}
const login_url = new URL(config["openid_url"] + "/auth");
login_url.searchParams.append("client_id", config["openid_client_id"]);
Expand Down
8 changes: 4 additions & 4 deletions src/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .util import make_sha256_tag
from .config import app_config
from .plugins import PluginManager
from .models.job import Job
from .models.job import Job, JobStatus
from .models.match import Match
from .lib.yaraparse import parse_yara, combine_rules
from .lib.ursadb import Json, UrsaDb
Expand Down Expand Up @@ -182,7 +182,7 @@ def start_search(job_id: JobId) -> None:
"""
with job_context(job_id) as agent:
job = agent.db.get_job(job_id)
if job.status == "cancelled":
if job.status == JobStatus.cancelled:
logging.info("Job was cancelled, returning...")
return

Expand Down Expand Up @@ -232,7 +232,7 @@ def query_ursadb(job_id: JobId, dataset_id: str, ursadb_query: str) -> None:
"""Queries ursadb and creates yara scans tasks with file batches."""
with job_context(job_id) as agent:
job = agent.db.get_job(job_id)
if job.status == "cancelled":
if job.status == JobStatus.cancelled:
logging.info("Job was cancelled, returning...")
return

Expand Down Expand Up @@ -271,7 +271,7 @@ def run_yara_batch(job_id: JobId, iterator: str, batch_size: int) -> None:
"""Actually scans files, and updates a database with the results."""
with job_context(job_id) as agent:
job = agent.db.get_job(job_id)
if job.status == "cancelled":
if job.status == JobStatus.cancelled:
logging.info("Job was cancelled, returning...")
return

Expand Down
Loading