Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transfer file2udf_quantit_qc to cg_lims #574

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
187d098
New Femtopulse csv file script
idalindegaard Dec 13, 2024
a86a2ca
clean up the code a bit
idalindegaard Dec 13, 2024
863fd3a
Merge branch 'master' into femtopulse_csv
idalindegaard Dec 13, 2024
ed5d0cf
updated file name
idalindegaard Dec 13, 2024
6a9d27f
Merge branch 'femtopulse_csv' of github.com:Clinical-Genomics/cg_lims…
idalindegaard Dec 13, 2024
64c2956
updated file name again
idalindegaard Dec 13, 2024
1a4a9c8
ran isort
idalindegaard Dec 13, 2024
1df87af
updated error handling
idalindegaard Dec 16, 2024
aa06f8e
updated error handling again
idalindegaard Dec 16, 2024
b3ac980
finishing touches
idalindegaard Dec 16, 2024
dc9ebcd
black and isort
Karl-Svard Dec 19, 2024
8b30f0b
more isort for some reason
Karl-Svard Dec 19, 2024
e2d8059
several changes after feedback
idalindegaard Jan 7, 2025
edb580a
index range fix
idalindegaard Jan 7, 2025
7fa34ce
sneak edits
Karl-Svard Jan 7, 2025
d93460e
renamed xml parser
Karl-Svard Jan 8, 2025
4cb6ce5
moved all parsers into new sub dir
Karl-Svard Jan 8, 2025
c07e94e
add new common func for creating well/artifact dicts
Karl-Svard Jan 8, 2025
bf2ff8a
add type hints and a bit of refactoring to file_to_udf
Karl-Svard Jan 8, 2025
75f60da
adding more code
Karl-Svard Jan 8, 2025
f9b4c70
some fixes and additions
Karl-Svard Jan 8, 2025
3286ec3
merge master and more
Karl-Svard Jan 8, 2025
89e1dd6
add more detailed error messages
Karl-Svard Jan 9, 2025
6b18bec
first set of changes to file2udf
Karl-Svard Jan 10, 2025
284390c
docstrings
Karl-Svard Jan 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cg_lims/EPPs/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
from cg_lims.EPPs.files.barcode_tubes import make_barcode_csv
from cg_lims.EPPs.files.csv_for_kapa_truble_shooting.csv_for_kapa_debug import trouble_shoot_kapa
from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv
from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf
from cg_lims.EPPs.files.hamilton.base import hamilton
from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report
from cg_lims.EPPs.files.parsers.file_to_udf import csv_well_to_udf
from cg_lims.EPPs.files.parsers.illumina_xml_to_udf import parse_run_parameters
from cg_lims.EPPs.files.parsers.ont_json_to_udf import parse_ont_report
from cg_lims.EPPs.files.parsers.quantit_excel_to_udf import quantit_excel_to_udf
from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map
from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map
from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet
from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet
from cg_lims.EPPs.files.smrt_link.run_design import create_smrtlink_run_design
from cg_lims.EPPs.files.smrt_link.sample_setup import create_smrtlink_sample_setup
from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters


@click.group(invoke_without_command=True)
Expand All @@ -35,3 +36,4 @@ def files(ctx):
files.add_command(make_femtopulse_csv)
files.add_command(create_smrtlink_sample_setup)
files.add_command(create_smrtlink_run_design)
files.add_command(quantit_excel_to_udf)
100 changes: 0 additions & 100 deletions cg_lims/EPPs/files/file_to_udf.py

This file was deleted.

Empty file.
153 changes: 153 additions & 0 deletions cg_lims/EPPs/files/parsers/file_to_udf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python

import csv
import logging
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import click
from cg_lims import options
from cg_lims.exceptions import ArgumentError, LimsError, MissingArtifactError, MissingFileError
from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name
from cg_lims.get.files import get_file_path
from genologics.entities import Artifact, Process

LOG = logging.getLogger(__name__)


def make_udf_dict(udfs: Tuple[str], value_fields: Tuple[str]) -> Dict[str, str]:
"""Create dictionary containing UDF names and their corresponding value field names."""
if len(udfs) != len(value_fields):
raise ArgumentError(
f"The number of artifact-udfs to update and file value fields must be the same."
)
udf_vf_dict: dict = {}
for i in range(len(udfs)):
udf_vf_dict[udfs[i]] = value_fields[i]
return udf_vf_dict


def get_file_placeholder_paths(placeholder_names: List[str], process: Process) -> List[str]:
"""Convert a list of file placeholder names to complete file paths."""
file_paths: List[str] = []
for placeholder_name in placeholder_names:
file_artifact: Artifact = get_artifact_by_name(process=process, name=placeholder_name)
file_paths.append(get_file_path(file_artifact=file_artifact))
return file_paths


def set_udfs_from_file(
well_field: str, udf_vf_dict: Dict[str, str], well_dict: dict, result_file: Path
) -> List[str]:
"""Parse a CSV file and set the corresponding UDF values for each sample."""
error_msg: List[str] = []
passed_arts: int = 0
with open(result_file, newline="", encoding="latin1") as csvfile:
reader: csv.DictReader = csv.DictReader(csvfile)
for udf_name in list(udf_vf_dict.keys()):
if udf_vf_dict[udf_name] not in reader.fieldnames:
LOG.info(
f"Value {udf_vf_dict[udf_name]} does not exist in file {result_file}, skipping."
)
continue
value_field: str = udf_vf_dict.pop(udf_name)

for sample in reader:
well: str = sample.get(well_field)
if well not in well_dict:
LOG.info(f"Well {well} was not found in the step. Skipping!")
continue
artifact: Artifact = well_dict[well]
value: Any = sample.get(value_field)
if value is None:
error_msg.append("Some samples in the file had missing values.")
LOG.info(f"Missing value for sample {sample} in well {well}. Skipping!")
continue
try:
artifact.udf[udf_name] = str(value)
except:
artifact.udf[udf_name] = float(value)
artifact.put()
passed_arts += 1

if passed_arts < len(well_dict.keys()):
error_msg.append("Some samples in the step were not represented in the file.")

return error_msg


def set_udfs(
well_fields: List[str],
udf_vf_dict: Dict[str, str],
well_dict: dict,
file_placeholders: List[str],
local_files: Optional[List[str]],
process: Process,
) -> None:
"""Loop through each given file and parse out the given values which are then set to their corresponding UDFs."""
if local_files:
files: List[str] = local_files
else:
files: List[str] = get_file_placeholder_paths(
placeholder_names=file_placeholders, process=process
)
if len(well_fields) != len(files):
raise ArgumentError(f"The number of files to read and file value fields must be the same.")

file_well_list: zip = zip(files, well_fields)
error_message: List[str] = []

for file_tuple in file_well_list:
file: str = file_tuple[0]
well_field: str = file_tuple[1]
if not Path(file).is_file():
raise MissingFileError(f"No such file: {file}")
error_message += set_udfs_from_file(
well_field=well_field,
udf_vf_dict=udf_vf_dict,
well_dict=well_dict,
result_file=Path(file),
)

if error_message:
error_string: str = " ".join(list(set(error_message)))
raise MissingArtifactError(error_string)


@click.command()
@options.file_placeholders(help="File placeholder name.")
@options.local_files()
@options.udf_values()
@options.well_fields()
@options.value_fields()
@options.input()
@click.pass_context
def csv_well_to_udf(
ctx,
files: Tuple[str],
local_files: Tuple[str],
udf_values: Tuple[str],
well_fields: Tuple[str],
value_fields: Tuple[str],
input: bool,
):
"""Script to copy data from files to UDFs based on well position."""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
process: Process = ctx.obj["process"]

try:
well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input)
udf_vf_dict: Dict[str, str] = make_udf_dict(udfs=udf_values, value_fields=value_fields)
set_udfs(
well_fields=list(well_fields),
udf_vf_dict=udf_vf_dict,
well_dict=well_dict,
file_placeholders=list(files),
local_files=list(local_files),
process=process,
)
click.echo("The UDFs were successfully populated.")
except LimsError as e:
sys.exit(e.message)
80 changes: 80 additions & 0 deletions cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import logging
import sys
from pathlib import Path
from typing import Dict

import click
import pandas as pd
from cg_lims import options
from cg_lims.exceptions import LimsError, MissingArtifactError, MissingFileError
from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name
from cg_lims.get.files import get_file_path
from genologics.entities import Artifact, Process

LOG = logging.getLogger(__name__)


def set_udfs(udf: str, well_dict: dict, result_file: Path):
"""Reads the Quant-iT Excel file and sets the value for each sample"""

failed_artifacts: int = 0
skipped_artifacts: int = 0
df: pd.DataFrame = pd.read_excel(result_file, skiprows=11, header=None)
for index, row in df.iterrows():
if row[0] not in well_dict.keys():
LOG.info(f"Well {row[0]} is not used by a sample in the step, skipping.")
skipped_artifacts += 1
continue
elif pd.isna(row[2]):
LOG.info(
f"Well {row[0]} does not have a valid concentration value ({row[2]}), skipping."
)
failed_artifacts += 1
continue
artifact: Artifact = well_dict[row[0]]
artifact.udf[udf] = row[2]
artifact.put()

if failed_artifacts or skipped_artifacts:
error_message = "Warning:"
if failed_artifacts:
error_message += f" Skipped {failed_artifacts} artifact(s) with wrong and/or blank values for some UDFs."
if skipped_artifacts:
error_message += f" Skipped {failed_artifacts} artifact(s) as they weren't represented in the result file."
raise MissingArtifactError(error_message)


@click.command()
@options.file_placeholder(help="File placeholder name.")
@options.local_file()
@options.udf()
@options.input()
@click.pass_context
def quantit_excel_to_udf(
ctx,
file: str,
local_file: str,
udf: str,
input: bool,
):
"""Script to copy data from a Quant-iT result Excel file to concentration UDFs based on well position"""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
process: Process = ctx.obj["process"]

if local_file:
file_path: str = local_file
else:
file_art: Artifact = get_artifact_by_name(process=process, name=file)
file_path: str = get_file_path(file_art)

try:
if not Path(file_path).is_file():
raise MissingFileError(f"No such file: {file_path}")
well_dict: Dict[str, Artifact] = create_well_dict(
process=process, input_flag=input, quantit_well_format=True
)
set_udfs(udf=udf, well_dict=well_dict, result_file=Path(file_path))
click.echo(f"Updated {len(well_dict.keys())} artifact(s) successfully.")
except LimsError as e:
sys.exit(e.message)
Loading
Loading