From 187d09820d67b0751da420a2a35d5b454fd18cac Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Fri, 13 Dec 2024 14:55:32 +0100 Subject: [PATCH 01/25] New Femtopulse csv file script --- cg_lims/EPPs/files/base.py | 2 + cg_lims/EPPs/files/femtopulse_csv.py | 90 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 cg_lims/EPPs/files/femtopulse_csv.py diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index 94e1a27d..28aed224 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -12,6 +12,7 @@ from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters +from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv @click.group(invoke_without_command=True) @@ -31,3 +32,4 @@ def files(ctx): files.add_command(create_sample_sheet) files.add_command(parse_run_parameters) files.add_command(parse_ont_report) +files.add_command(make_femtopulse_csv) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py new file mode 100644 index 00000000..d463ec25 --- /dev/null +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -0,0 +1,90 @@ +import logging +import sys +from pathlib import Path +from typing import List + +import click +import pandas as pd +from cg_lims import options +from cg_lims.exceptions import InvalidValueError, LimsError +from cg_lims.get.artifacts import get_artifacts +from genologics.lims import Artifact + +LOG = logging.getLogger(__name__) + +ROWS = list(range(1,13)) # list numbered 1 to 12 +WELL_POSITIONS = [f"A{i}" for i in range(1,13)] # list with well positions A1-A12 +SAMPLE_NAMES = [""] * 12 # List with twelve empty positions for sample names + + +def parse_well(artifact_position: str) -> str: + """Convert position from format 'A:1' to 'A1'.""" + try: + row, col = artifact_position.split(":") + return f"{row}{col}" + except Exception: + return None + + +def get_data_and_write( + artifacts: List[Artifact], + file: str +): + """Make a csv file for a Femtopulse run start with three columns: + one numbered 1-12, one with the sample position/well for the run and + a column with the sample name or ladder (in the 12th position).""" + + for artifact in artifacts: + + artifact_name: str = artifact.samples[0].name + artifact_well: str = artifact.location[1] + + # Convert sample well format from 'A:1' to 'A1' + parsed_well: str = parse_well(artifact_well) + + # Checks that the sample well matches with one in the WELL_POSITIONS list and is A1-A11 + if parsed_well in WELL_POSITIONS: + index: int = WELL_POSITIONS.index(parsed_well) + if index < 11: + SAMPLE_NAMES[index] = artifact_name + else: + raise InvalidValueError(f"Position {parsed_well} reserved for 'ladder', skipped.") + else: + raise InvalidValueError(f"Not possible position ({parsed_well}) for {artifact_name}, skipped.") + + # The ladder will always be in well A12 + SAMPLE_NAMES[-1] = "ladder" + + # Create the csv file + df = pd.DataFrame({ + 0: ROWS, + 1: WELL_POSITIONS, + 2: SAMPLE_NAMES + }) + print(df) + print(file) + df.to_csv(Path(file), index=False, header=False) + +@click.command() +@options.file_placeholder() +@options.measurement() +@click.pass_context +def make_femtopulse_csv( + ctx: click.Context, + file: str, + measurement: bool, +): + """Script to make a csv file for a Femtopulse run""" + + LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") + process = ctx.obj["process"] + artifacts = get_artifacts(process=process, measurement=measurement) + + try: + get_data_and_write( + artifacts=artifacts, + file=f"{process.id}-{file}.csv", + ) + click.echo("The file was successfully generated.") + except LimsError as e: + sys.exit(e.message) \ No newline at end of file From a86a2cab79cbfcb74cb8b27d1f45496086e468c9 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Fri, 13 Dec 2024 15:02:56 +0100 Subject: [PATCH 02/25] clean up the code a bit --- cg_lims/EPPs/files/femtopulse_csv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index d463ec25..29b8e998 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -12,8 +12,8 @@ LOG = logging.getLogger(__name__) -ROWS = list(range(1,13)) # list numbered 1 to 12 -WELL_POSITIONS = [f"A{i}" for i in range(1,13)] # list with well positions A1-A12 +ROWS = list(range(1,13)) # List numbered 1 to 12 +WELL_POSITIONS = [f"A{i}" for i in range(1,13)] # List with well positions A1-A12 SAMPLE_NAMES = [""] * 12 # List with twelve empty positions for sample names @@ -61,8 +61,6 @@ def get_data_and_write( 1: WELL_POSITIONS, 2: SAMPLE_NAMES }) - print(df) - print(file) df.to_csv(Path(file), index=False, header=False) @click.command() From ed5d0cfc18ba2d898a0cd6fa0331fa14ab05259d Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Fri, 13 Dec 2024 15:27:07 +0100 Subject: [PATCH 03/25] updated file name --- cg_lims/EPPs/files/femtopulse_csv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 29b8e998..12838cb6 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -63,6 +63,7 @@ def get_data_and_write( }) df.to_csv(Path(file), index=False, header=False) + @click.command() @options.file_placeholder() @options.measurement() @@ -81,7 +82,7 @@ def make_femtopulse_csv( try: get_data_and_write( artifacts=artifacts, - file=f"{process.id}-{file}.csv", + file=f"{file}.csv", ) click.echo("The file was successfully generated.") except LimsError as e: From 64c2956ede1fd4b6e40cf7c84db4bd75238537b1 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Fri, 13 Dec 2024 15:29:09 +0100 Subject: [PATCH 04/25] updated file name again --- cg_lims/EPPs/files/femtopulse_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 12838cb6..53368a33 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -82,7 +82,7 @@ def make_femtopulse_csv( try: get_data_and_write( artifacts=artifacts, - file=f"{file}.csv", + file=f"{file}_femtopulse.csv", ) click.echo("The file was successfully generated.") except LimsError as e: From 1a4a9c8764fc1d239766fd27d5bc8d0b6a21771c Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Fri, 13 Dec 2024 15:53:40 +0100 Subject: [PATCH 05/25] ran isort --- cg_lims/EPPs/files/femtopulse_csv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 53368a33..fe11c197 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -5,10 +5,11 @@ import click import pandas as pd +from genologics.lims import Artifact + from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts -from genologics.lims import Artifact LOG = logging.getLogger(__name__) From 1df87aff28c8c84a0e2f4ecd4f12ab3606eb90c7 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Mon, 16 Dec 2024 13:56:36 +0100 Subject: [PATCH 06/25] updated error handling --- cg_lims/EPPs/files/femtopulse_csv.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index fe11c197..bb346c3c 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -35,6 +35,8 @@ def get_data_and_write( one numbered 1-12, one with the sample position/well for the run and a column with the sample name or ladder (in the 12th position).""" + failed_samples: list = [] + for artifact in artifacts: artifact_name: str = artifact.samples[0].name @@ -49,9 +51,20 @@ def get_data_and_write( if index < 11: SAMPLE_NAMES[index] = artifact_name else: - raise InvalidValueError(f"Position {parsed_well} reserved for 'ladder', skipped.") + failed_samples.append({"artifact_name": artifact_name, + "parsed_well": parsed_well, + "error": "This position is reserved for the ladder"}) else: - raise InvalidValueError(f"Not possible position ({parsed_well}) for {artifact_name}, skipped.") + failed_samples.append({"artifact_name": artifact_name, + "parsed_well": parsed_well, + "error": "Position is not possible for the run"}) + + if failed_samples: + error_message: str = "\n".join( + f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" + for sample in failed_samples + ) + raise InvalidValueError(f"Errors found:\n{error_message}") # The ladder will always be in well A12 SAMPLE_NAMES[-1] = "ladder" From aa06f8ef6d3cfd16c44f13714c987de97a2099df Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Mon, 16 Dec 2024 14:34:53 +0100 Subject: [PATCH 07/25] updated error handling again --- cg_lims/EPPs/files/femtopulse_csv.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index bb346c3c..6897f63f 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -60,11 +60,15 @@ def get_data_and_write( "error": "Position is not possible for the run"}) if failed_samples: - error_message: str = "\n".join( - f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" - for sample in failed_samples - ) - raise InvalidValueError(f"Errors found:\n{error_message}") + error_index: int = 0 + for sample in failed_samples: + error_message: str = f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" + if error_index < 1: + all_errors: str = error_message + error_index =+ 1 + else: + all_errors = all_errors + '\n' + error_message + raise InvalidValueError(f"Errors found:\n{all_errors}") # The ladder will always be in well A12 SAMPLE_NAMES[-1] = "ladder" From b3ac98028923f81d0461bd44a9771fcd4a70619b Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Mon, 16 Dec 2024 16:12:59 +0100 Subject: [PATCH 08/25] finishing touches --- cg_lims/EPPs/files/femtopulse_csv.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 6897f63f..0aa0e987 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -42,10 +42,11 @@ def get_data_and_write( artifact_name: str = artifact.samples[0].name artifact_well: str = artifact.location[1] - # Convert sample well format from 'A:1' to 'A1' + # Converts sample well format from 'A:1' to 'A1' parsed_well: str = parse_well(artifact_well) - # Checks that the sample well matches with one in the WELL_POSITIONS list and is A1-A11 + # Checks that the sample well matches with one in the WELL_POSITIONS list (A1-A11) + # and adds the sample name to the SAMPLE_NAMES list for that position if parsed_well in WELL_POSITIONS: index: int = WELL_POSITIONS.index(parsed_well) if index < 11: @@ -53,12 +54,13 @@ def get_data_and_write( else: failed_samples.append({"artifact_name": artifact_name, "parsed_well": parsed_well, - "error": "This position is reserved for the ladder"}) + "error": "This position is reserved for the ladder."}) else: failed_samples.append({"artifact_name": artifact_name, "parsed_well": parsed_well, - "error": "Position is not possible for the run"}) + "error": "Position is not possible for the run."}) + # Prints out error message(s) if failed_samples: error_index: int = 0 for sample in failed_samples: @@ -67,8 +69,8 @@ def get_data_and_write( all_errors: str = error_message error_index =+ 1 else: - all_errors = all_errors + '\n' + error_message - raise InvalidValueError(f"Errors found:\n{all_errors}") + all_errors = all_errors + ' ' + error_message + raise InvalidValueError(f"Errors found: {all_errors}") # The ladder will always be in well A12 SAMPLE_NAMES[-1] = "ladder" From dc9ebcdbc6484696390430e4f2ba1bdad9dea6b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Thu, 19 Dec 2024 17:56:18 +0100 Subject: [PATCH 09/25] black and isort --- cg_lims/EPPs/files/base.py | 2 +- cg_lims/EPPs/files/femtopulse_csv.py | 54 ++++++++++++++-------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index a7909d12..6f0d142b 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -2,6 +2,7 @@ import click from cg_lims.EPPs.files.barcode_tubes import make_barcode_csv from cg_lims.EPPs.files.csv_for_kapa_truble_shooting.csv_for_kapa_debug import trouble_shoot_kapa +from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv # commands from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf @@ -14,7 +15,6 @@ from cg_lims.EPPs.files.smrt_link.run_design import create_smrtlink_run_design from cg_lims.EPPs.files.smrt_link.sample_setup import create_smrtlink_sample_setup from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters -from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv @click.group(invoke_without_command=True) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 0aa0e987..6a0c34d4 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -5,17 +5,16 @@ import click import pandas as pd -from genologics.lims import Artifact - from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts +from genologics.lims import Artifact LOG = logging.getLogger(__name__) -ROWS = list(range(1,13)) # List numbered 1 to 12 -WELL_POSITIONS = [f"A{i}" for i in range(1,13)] # List with well positions A1-A12 -SAMPLE_NAMES = [""] * 12 # List with twelve empty positions for sample names +ROWS = list(range(1, 13)) # List numbered 1 to 12 +WELL_POSITIONS = [f"A{i}" for i in range(1, 13)] # List with well positions A1-A12 +SAMPLE_NAMES = [""] * 12 # List with twelve empty positions for sample names def parse_well(artifact_position: str) -> str: @@ -27,12 +26,9 @@ def parse_well(artifact_position: str) -> str: return None -def get_data_and_write( - artifacts: List[Artifact], - file: str -): - """Make a csv file for a Femtopulse run start with three columns: - one numbered 1-12, one with the sample position/well for the run and +def get_data_and_write(artifacts: List[Artifact], file: str): + """Make a csv file for a Femtopulse run start with three columns: + one numbered 1-12, one with the sample position/well for the run and a column with the sample name or ladder (in the 12th position).""" failed_samples: list = [] @@ -52,35 +48,41 @@ def get_data_and_write( if index < 11: SAMPLE_NAMES[index] = artifact_name else: - failed_samples.append({"artifact_name": artifact_name, - "parsed_well": parsed_well, - "error": "This position is reserved for the ladder."}) + failed_samples.append( + { + "artifact_name": artifact_name, + "parsed_well": parsed_well, + "error": "This position is reserved for the ladder.", + } + ) else: - failed_samples.append({"artifact_name": artifact_name, - "parsed_well": parsed_well, - "error": "Position is not possible for the run."}) + failed_samples.append( + { + "artifact_name": artifact_name, + "parsed_well": parsed_well, + "error": "Position is not possible for the run.", + } + ) # Prints out error message(s) if failed_samples: error_index: int = 0 for sample in failed_samples: - error_message: str = f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" + error_message: str = ( + f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" + ) if error_index < 1: all_errors: str = error_message - error_index =+ 1 + error_index = +1 else: - all_errors = all_errors + ' ' + error_message + all_errors = all_errors + " " + error_message raise InvalidValueError(f"Errors found: {all_errors}") # The ladder will always be in well A12 SAMPLE_NAMES[-1] = "ladder" # Create the csv file - df = pd.DataFrame({ - 0: ROWS, - 1: WELL_POSITIONS, - 2: SAMPLE_NAMES - }) + df = pd.DataFrame({0: ROWS, 1: WELL_POSITIONS, 2: SAMPLE_NAMES}) df.to_csv(Path(file), index=False, header=False) @@ -106,4 +108,4 @@ def make_femtopulse_csv( ) click.echo("The file was successfully generated.") except LimsError as e: - sys.exit(e.message) \ No newline at end of file + sys.exit(e.message) From 8b30f0bae224a297ee1e6b5e8bad9d5a1148f77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Thu, 19 Dec 2024 17:57:44 +0100 Subject: [PATCH 10/25] more isort for some reason --- cg_lims/EPPs/files/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index 6f0d142b..e5cbef56 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -3,8 +3,6 @@ from cg_lims.EPPs.files.barcode_tubes import make_barcode_csv from cg_lims.EPPs.files.csv_for_kapa_truble_shooting.csv_for_kapa_debug import trouble_shoot_kapa from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv - -# commands from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf from cg_lims.EPPs.files.hamilton.base import hamilton from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report From e2d805944bc636781203e40c2d11c4e522c0c677 Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Tue, 7 Jan 2025 16:15:24 +0100 Subject: [PATCH 11/25] several changes after feedback --- cg_lims/EPPs/files/femtopulse_csv.py | 62 ++++++++++++++-------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 6a0c34d4..756d77a8 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -5,25 +5,30 @@ import click import pandas as pd +from genologics.lims import Artifact + from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts -from genologics.lims import Artifact +from cg_lims.get.fields import get_artifact_well LOG = logging.getLogger(__name__) -ROWS = list(range(1, 13)) # List numbered 1 to 12 + WELL_POSITIONS = [f"A{i}" for i in range(1, 13)] # List with well positions A1-A12 -SAMPLE_NAMES = [""] * 12 # List with twelve empty positions for sample names +SAMPLE_NAMES = [""] * len( + WELL_POSITIONS +) # List with twelve empty positions for sample names +DATAFRAME = pd.DataFrame( + {"well positions": WELL_POSITIONS, "sample names": SAMPLE_NAMES} +) # Dataframe with well positions and sample names -def parse_well(artifact_position: str) -> str: - """Convert position from format 'A:1' to 'A1'.""" - try: - row, col = artifact_position.split(":") - return f"{row}{col}" - except Exception: - return None +def get_sample_artifact_name(artifact: Artifact): + + artifact_name: str = artifact.samples[0].name + + return artifact_name def get_data_and_write(artifacts: List[Artifact], file: str): @@ -35,55 +40,52 @@ def get_data_and_write(artifacts: List[Artifact], file: str): for artifact in artifacts: - artifact_name: str = artifact.samples[0].name - artifact_well: str = artifact.location[1] + artifact_name: str = get_sample_artifact_name(artifact=artifact) + artifact_well: str = get_artifact_well(artifact=artifact) # Converts sample well format from 'A:1' to 'A1' - parsed_well: str = parse_well(artifact_well) + #parsed_well: str = parse_well(artifact_well) # Checks that the sample well matches with one in the WELL_POSITIONS list (A1-A11) # and adds the sample name to the SAMPLE_NAMES list for that position - if parsed_well in WELL_POSITIONS: - index: int = WELL_POSITIONS.index(parsed_well) - if index < 11: - SAMPLE_NAMES[index] = artifact_name - else: + if artifact_well in DATAFRAME["well positions"].values: + if artifact_well == DATAFRAME["well positions"].iloc[-1]: failed_samples.append( { "artifact_name": artifact_name, - "parsed_well": parsed_well, + "parsed_well": artifact_well, "error": "This position is reserved for the ladder.", } ) + else: + DATAFRAME.loc[ + DATAFRAME["well positions"] == artifact_well, "sample names" + ] = artifact_name else: failed_samples.append( { "artifact_name": artifact_name, - "parsed_well": parsed_well, - "error": "Position is not possible for the run.", + "parsed_well": artifact_well, + "error": "This position is not possible for the run.", } ) # Prints out error message(s) if failed_samples: - error_index: int = 0 + all_errors = "" for sample in failed_samples: error_message: str = ( f"Sample {sample['artifact_name']} in position {sample['parsed_well']}: {sample['error']}" ) - if error_index < 1: - all_errors: str = error_message - error_index = +1 - else: - all_errors = all_errors + " " + error_message + all_errors = all_errors + " " + error_message raise InvalidValueError(f"Errors found: {all_errors}") # The ladder will always be in well A12 - SAMPLE_NAMES[-1] = "ladder" + DATAFRAME["sample names"].iloc[-1] = "ladder" # Create the csv file - df = pd.DataFrame({0: ROWS, 1: WELL_POSITIONS, 2: SAMPLE_NAMES}) - df.to_csv(Path(file), index=False, header=False) + DATAFRAME.to_csv(Path(file), index=True, header=False) + DATAFRAME.index = range(1, len(DATAFRAME) + 1) @click.command() From edb580a1d59f96bb9aad7f4dfc8e5936a3cd8d3b Mon Sep 17 00:00:00 2001 From: idalindegaard Date: Tue, 7 Jan 2025 16:28:13 +0100 Subject: [PATCH 12/25] index range fix --- cg_lims/EPPs/files/femtopulse_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index 756d77a8..b07cb198 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -84,8 +84,8 @@ def get_data_and_write(artifacts: List[Artifact], file: str): DATAFRAME["sample names"].iloc[-1] = "ladder" # Create the csv file - DATAFRAME.to_csv(Path(file), index=True, header=False) DATAFRAME.index = range(1, len(DATAFRAME) + 1) + DATAFRAME.to_csv(Path(file), index=True, header=False) @click.command() From 7fa34cee47db5ef9a844829b63cbb2baedf2a456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Tue, 7 Jan 2025 17:00:04 +0100 Subject: [PATCH 13/25] sneak edits --- cg_lims/EPPs/files/femtopulse_csv.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/cg_lims/EPPs/files/femtopulse_csv.py b/cg_lims/EPPs/files/femtopulse_csv.py index b07cb198..437e0eec 100644 --- a/cg_lims/EPPs/files/femtopulse_csv.py +++ b/cg_lims/EPPs/files/femtopulse_csv.py @@ -5,20 +5,17 @@ import click import pandas as pd -from genologics.lims import Artifact - from cg_lims import options from cg_lims.exceptions import InvalidValueError, LimsError from cg_lims.get.artifacts import get_artifacts from cg_lims.get.fields import get_artifact_well +from genologics.lims import Artifact LOG = logging.getLogger(__name__) WELL_POSITIONS = [f"A{i}" for i in range(1, 13)] # List with well positions A1-A12 -SAMPLE_NAMES = [""] * len( - WELL_POSITIONS -) # List with twelve empty positions for sample names +SAMPLE_NAMES = [""] * len(WELL_POSITIONS) # List with twelve empty positions for sample names DATAFRAME = pd.DataFrame( {"well positions": WELL_POSITIONS, "sample names": SAMPLE_NAMES} ) # Dataframe with well positions and sample names @@ -41,10 +38,9 @@ def get_data_and_write(artifacts: List[Artifact], file: str): for artifact in artifacts: artifact_name: str = get_sample_artifact_name(artifact=artifact) - artifact_well: str = get_artifact_well(artifact=artifact) - # Converts sample well format from 'A:1' to 'A1' - #parsed_well: str = parse_well(artifact_well) + # Fetch sample well in format 'A1' + artifact_well: str = get_artifact_well(artifact=artifact) # Checks that the sample well matches with one in the WELL_POSITIONS list (A1-A11) # and adds the sample name to the SAMPLE_NAMES list for that position @@ -58,9 +54,9 @@ def get_data_and_write(artifacts: List[Artifact], file: str): } ) else: - DATAFRAME.loc[ - DATAFRAME["well positions"] == artifact_well, "sample names" - ] = artifact_name + DATAFRAME.loc[DATAFRAME["well positions"] == artifact_well, "sample names"] = ( + artifact_name + ) else: failed_samples.append( { From d93460e8730a4a0f3b4a956656ea99ab6464475f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 09:39:34 +0100 Subject: [PATCH 14/25] renamed xml parser --- cg_lims/EPPs/files/base.py | 2 +- cg_lims/EPPs/files/{xml_to_udf.py => illumina_xml_to_udf.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cg_lims/EPPs/files/{xml_to_udf.py => illumina_xml_to_udf.py} (100%) diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index e5cbef56..fd57065a 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -5,6 +5,7 @@ from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf from cg_lims.EPPs.files.hamilton.base import hamilton +from cg_lims.EPPs.files.illumina_xml_to_udf import parse_run_parameters from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map @@ -12,7 +13,6 @@ from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet from cg_lims.EPPs.files.smrt_link.run_design import create_smrtlink_run_design from cg_lims.EPPs.files.smrt_link.sample_setup import create_smrtlink_sample_setup -from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters @click.group(invoke_without_command=True) diff --git a/cg_lims/EPPs/files/xml_to_udf.py b/cg_lims/EPPs/files/illumina_xml_to_udf.py similarity index 100% rename from cg_lims/EPPs/files/xml_to_udf.py rename to cg_lims/EPPs/files/illumina_xml_to_udf.py From 4cb6ce505ce52766721f96c00f6b50c842615dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 09:41:57 +0100 Subject: [PATCH 15/25] moved all parsers into new sub dir --- cg_lims/EPPs/files/base.py | 6 +++--- cg_lims/EPPs/files/parsers/__init__.py | 0 cg_lims/EPPs/files/{ => parsers}/file_to_udf.py | 0 cg_lims/EPPs/files/{ => parsers}/illumina_xml_to_udf.py | 0 cg_lims/EPPs/files/{ => parsers}/ont_json_to_udf.py | 0 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 cg_lims/EPPs/files/parsers/__init__.py rename cg_lims/EPPs/files/{ => parsers}/file_to_udf.py (100%) rename cg_lims/EPPs/files/{ => parsers}/illumina_xml_to_udf.py (100%) rename cg_lims/EPPs/files/{ => parsers}/ont_json_to_udf.py (100%) diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index fd57065a..15236771 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -3,10 +3,10 @@ from cg_lims.EPPs.files.barcode_tubes import make_barcode_csv from cg_lims.EPPs.files.csv_for_kapa_truble_shooting.csv_for_kapa_debug import trouble_shoot_kapa from cg_lims.EPPs.files.femtopulse_csv import make_femtopulse_csv -from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf from cg_lims.EPPs.files.hamilton.base import hamilton -from cg_lims.EPPs.files.illumina_xml_to_udf import parse_run_parameters -from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report +from cg_lims.EPPs.files.parsers.file_to_udf import csv_well_to_udf +from cg_lims.EPPs.files.parsers.illumina_xml_to_udf import parse_run_parameters +from cg_lims.EPPs.files.parsers.ont_json_to_udf import parse_ont_report from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet diff --git a/cg_lims/EPPs/files/parsers/__init__.py b/cg_lims/EPPs/files/parsers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cg_lims/EPPs/files/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py similarity index 100% rename from cg_lims/EPPs/files/file_to_udf.py rename to cg_lims/EPPs/files/parsers/file_to_udf.py diff --git a/cg_lims/EPPs/files/illumina_xml_to_udf.py b/cg_lims/EPPs/files/parsers/illumina_xml_to_udf.py similarity index 100% rename from cg_lims/EPPs/files/illumina_xml_to_udf.py rename to cg_lims/EPPs/files/parsers/illumina_xml_to_udf.py diff --git a/cg_lims/EPPs/files/ont_json_to_udf.py b/cg_lims/EPPs/files/parsers/ont_json_to_udf.py similarity index 100% rename from cg_lims/EPPs/files/ont_json_to_udf.py rename to cg_lims/EPPs/files/parsers/ont_json_to_udf.py From c07e94e770e9be7bcd24ae5ff42e0b70796c71dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 11:16:39 +0100 Subject: [PATCH 16/25] add new common func for creating well/artifact dicts --- .../files/parsers/quantit_excel_to_udf.py | 13 +++++++++ cg_lims/get/artifacts.py | 29 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py diff --git a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py new file mode 100644 index 00000000..61928321 --- /dev/null +++ b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py @@ -0,0 +1,13 @@ +import csv +import logging +import sys +from pathlib import Path + +import click +from cg_lims import options +from cg_lims.exceptions import LimsError, MissingArtifactError, MissingFileError +from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name +from cg_lims.get.files import get_file_path +from genologics.entities import Artifact + +LOG = logging.getLogger(__name__) diff --git a/cg_lims/get/artifacts.py b/cg_lims/get/artifacts.py index 2568b447..17a1eb42 100644 --- a/cg_lims/get/artifacts.py +++ b/cg_lims/get/artifacts.py @@ -245,3 +245,32 @@ def get_non_pooled_artifacts(artifact: Artifact) -> List[Artifact]: for artifact in artifact.input_artifact_list(): artifacts.extend(get_non_pooled_artifacts(artifact)) return artifacts + + +def create_well_dict( + process: Process, + input_flag: bool = False, + native_well_format: bool = False, +) -> Dict[str, Artifact]: + """Creates a well dict based on the input_output_map + keys: well of input artifact + values: input/output artifact depending on the input flag + """ + + well_dict: Dict[str, Artifact] = {} + lims: Lims = process.lims + for input, output in process.input_output_maps: + if output.get("output-generation-type") == "PerAllInputs": + continue + source_artifact: Artifact = ( + Artifact(lims, id=input["limsid"]) + if input_flag + else Artifact(lims, id=output["limsid"]) + ) + well: str = ( + source_artifact.location[1].replace(":", "") + if not native_well_format + else source_artifact.location[1] + ) + well_dict[well] = source_artifact + return well_dict From bf2ff8a6939d8a914a7ce3e6195c9f77f5f09b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 11:27:59 +0100 Subject: [PATCH 17/25] add type hints and a bit of refactoring to file_to_udf --- cg_lims/EPPs/files/parsers/file_to_udf.py | 57 ++++++++--------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py index 9e44396b..9513b610 100644 --- a/cg_lims/EPPs/files/parsers/file_to_udf.py +++ b/cg_lims/EPPs/files/parsers/file_to_udf.py @@ -4,46 +4,28 @@ import logging import sys from pathlib import Path +from typing import Any, Dict, List import click from cg_lims import options from cg_lims.exceptions import LimsError, MissingArtifactError, MissingFileError -from cg_lims.get.artifacts import get_artifact_by_name +from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name from cg_lims.get.files import get_file_path -from genologics.entities import Artifact +from genologics.entities import Artifact, Process LOG = logging.getLogger(__name__) -def make_well_dict(process, lims, input): - """Creates a well dict based on input_output_map - keys: well of input artifact - values: input/output artifact depending on the input flag - """ - - well_dict = {} - for inp, outp in process.input_output_maps: - if outp.get("output-generation-type") == "PerAllInputs": - continue - in_art = Artifact(lims, id=inp["limsid"]) - out_art = Artifact(lims, id=outp["limsid"]) - source_art = in_art if input == True else out_art - col, row = source_art.location[1].split(":") - well = col + row - well_dict[well] = out_art - return well_dict - - def set_udfs(well_field: str, value_field: str, udf: str, well_dict: dict, result_file: Path): """Reads the csv and sets the value for each sample""" - error_msg = [] - passed_arts = 0 + error_msg: List[str] = [] + passed_arts: int = 0 with open(result_file, newline="", encoding="latin1") as csvfile: - reader = csv.DictReader(csvfile) + reader: csv.DictReader = csv.DictReader(csvfile) for sample in reader: - well = sample.get(well_field) - value = sample.get(value_field) + well: str = sample.get(well_field) + value: Any = sample.get(value_field) if value is None: error_msg.append("Some samples in the file had missing values.") LOG.info(f"Missing value for sample {sample} in well {well}. Skipping!") @@ -51,18 +33,18 @@ def set_udfs(well_field: str, value_field: str, udf: str, well_dict: dict, resul elif well not in well_dict: LOG.info(f"Well {well} was not found in the step. Skipping!") continue - art = well_dict[well] + artifact: Artifact = well_dict[well] try: - art.udf[udf] = str(value) + artifact.udf[udf] = str(value) except: - art.udf[udf] = float(value) - art.put() + artifact.udf[udf] = float(value) + artifact.put() passed_arts += 1 if passed_arts < len(well_dict.keys()): error_msg.append("Some samples in the step were not represented in the file.") - error_string = " ".join(list(set(error_msg))) + error_string: str = " ".join(list(set(error_msg))) if error_msg: raise MissingArtifactError(error_string) @@ -81,20 +63,19 @@ def csv_well_to_udf( """Script to copy data from file to udf based on well position""" LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") - process = ctx.obj["process"] - lims = ctx.obj["lims"] + process: Process = ctx.obj["process"] if local_file: - file_path = local_file + file_path: str = local_file else: - file_art = get_artifact_by_name(process=process, name=file) - file_path = get_file_path(file_art) + file_art: Artifact = get_artifact_by_name(process=process, name=file) + file_path: str = get_file_path(file_art) try: if not Path(file_path).is_file(): raise MissingFileError(f"No such file: {file_path}") - well_dict = make_well_dict(process, lims, input) - set_udfs(well_field, value_field, udf, well_dict, file_path) + well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input) + set_udfs(well_field, value_field, udf, well_dict, Path(file_path)) click.echo("The udfs were sucessfully populated.") except LimsError as e: sys.exit(e.message) From 75f60dabbe345a06eca605a3f57a24e86cb15c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 15:14:17 +0100 Subject: [PATCH 18/25] adding more code --- cg_lims/EPPs/files/base.py | 2 + .../files/parsers/quantit_excel_to_udf.py | 64 ++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/cg_lims/EPPs/files/base.py b/cg_lims/EPPs/files/base.py index 15236771..54df436a 100644 --- a/cg_lims/EPPs/files/base.py +++ b/cg_lims/EPPs/files/base.py @@ -7,6 +7,7 @@ from cg_lims.EPPs.files.parsers.file_to_udf import csv_well_to_udf from cg_lims.EPPs.files.parsers.illumina_xml_to_udf import parse_run_parameters from cg_lims.EPPs.files.parsers.ont_json_to_udf import parse_ont_report +from cg_lims.EPPs.files.parsers.quantit_excel_to_udf import quantit_excel_to_udf from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet @@ -35,3 +36,4 @@ def files(ctx): files.add_command(make_femtopulse_csv) files.add_command(create_smrtlink_sample_setup) files.add_command(create_smrtlink_run_design) +files.add_command(quantit_excel_to_udf) diff --git a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py index 61928321..fb151f58 100644 --- a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py +++ b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py @@ -1,13 +1,73 @@ -import csv import logging import sys from pathlib import Path +from typing import Dict import click +import pandas as pd from cg_lims import options from cg_lims.exceptions import LimsError, MissingArtifactError, MissingFileError from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name from cg_lims.get.files import get_file_path -from genologics.entities import Artifact +from genologics.entities import Artifact, Process LOG = logging.getLogger(__name__) + + +def set_udfs(udf: str, well_dict: dict, result_file: Path): + """Reads the Quant-iT Excel file and sets the value for each sample""" + + failed_artifacts: int = 0 + df: pd.DataFrame = pd.read_excel(result_file, skiprows=11, header=None) + for index, row in df.iterrows(): + if row[0] not in well_dict.keys(): + LOG.info(f"Well {row[0]} is not used by a sample in the step, skipping.") + continue + elif pd.isna(row[2]): + LOG.info( + f"Well {row[0]} does not have a valid concentration value ({row[2]}), skipping." + ) + failed_artifacts += 1 + continue + artifact: Artifact = well_dict[row[0]] + artifact.udf[udf] = row[2] + artifact.put() + + if failed_artifacts: + raise MissingArtifactError( + f"Warning: Skipped {failed_artifacts} artifact(s) with wrong and/or blank values for some UDFs." + ) + + +@click.command() +@options.file_placeholder(help="File placeholder name.") +@options.local_file() +@options.udf() +@options.input() +@click.pass_context +def quantit_excel_to_udf( + ctx, + file: str, + local_file: str, + udf: str, + input: bool, +): + """Script to copy data from a Quant-iT result Excel file to concentration UDFs based on well position""" + + LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") + process: Process = ctx.obj["process"] + + if local_file: + file_path: str = local_file + else: + file_art: Artifact = get_artifact_by_name(process=process, name=file) + file_path: str = get_file_path(file_art) + + try: + if not Path(file_path).is_file(): + raise MissingFileError(f"No such file: {file_path}") + well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input) + set_udfs(udf, well_dict, Path(file_path)) + click.echo(f"Updated {len(well_dict.keys())} artifact(s) successfully.") + except LimsError as e: + sys.exit(e.message) From f9b4c70ac1f2cda12ab3ed739445d8b9e5736d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Wed, 8 Jan 2025 17:10:31 +0100 Subject: [PATCH 19/25] some fixes and additions --- .../files/parsers/quantit_excel_to_udf.py | 6 ++-- cg_lims/get/artifacts.py | 29 +++++++++++-------- cg_lims/get/fields.py | 9 ++++++ 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py index fb151f58..9b7c35b2 100644 --- a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py +++ b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py @@ -66,8 +66,10 @@ def quantit_excel_to_udf( try: if not Path(file_path).is_file(): raise MissingFileError(f"No such file: {file_path}") - well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input) - set_udfs(udf, well_dict, Path(file_path)) + well_dict: Dict[str, Artifact] = create_well_dict( + process=process, input_flag=input, quantit_well_format=True + ) + set_udfs(udf=udf, well_dict=well_dict, result_file=Path(file_path)) click.echo(f"Updated {len(well_dict.keys())} artifact(s) successfully.") except LimsError as e: sys.exit(e.message) diff --git a/cg_lims/get/artifacts.py b/cg_lims/get/artifacts.py index 17a1eb42..c05bb919 100644 --- a/cg_lims/get/artifacts.py +++ b/cg_lims/get/artifacts.py @@ -3,7 +3,8 @@ from enum import Enum from typing import Dict, List, Literal, Optional, Set, Tuple -from cg_lims.exceptions import FileError, MissingArtifactError +from cg_lims.exceptions import FileError, InvalidValueError, MissingArtifactError +from cg_lims.get.fields import get_artifact_well, get_quantit_artifact_well from genologics.entities import Artifact, Process, Sample from genologics.lims import Lims @@ -251,6 +252,7 @@ def create_well_dict( process: Process, input_flag: bool = False, native_well_format: bool = False, + quantit_well_format: bool = False, ) -> Dict[str, Artifact]: """Creates a well dict based on the input_output_map keys: well of input artifact @@ -262,15 +264,18 @@ def create_well_dict( for input, output in process.input_output_maps: if output.get("output-generation-type") == "PerAllInputs": continue - source_artifact: Artifact = ( - Artifact(lims, id=input["limsid"]) - if input_flag - else Artifact(lims, id=output["limsid"]) - ) - well: str = ( - source_artifact.location[1].replace(":", "") - if not native_well_format - else source_artifact.location[1] - ) - well_dict[well] = source_artifact + input_artifact = Artifact(lims, id=input["limsid"]) + output_artifact = Artifact(lims, id=output["limsid"]) + source_artifact: Artifact = input_artifact if input_flag else output_artifact + if native_well_format: + well: str = source_artifact.location[1] + elif quantit_well_format: + well: str = get_quantit_artifact_well(artifact=source_artifact) + else: + well: str = get_artifact_well(artifact=source_artifact) + if well in well_dict.keys(): + raise InvalidValueError( + f"Can't create dictionary! Well {well} is already used by another artifact." + ) + well_dict[well] = output_artifact return well_dict diff --git a/cg_lims/get/fields.py b/cg_lims/get/fields.py index 24f65442..b3fd6a9a 100644 --- a/cg_lims/get/fields.py +++ b/cg_lims/get/fields.py @@ -65,6 +65,15 @@ def get_artifact_well(artifact: Artifact) -> str: return location[1].replace(":", "") +def get_quantit_artifact_well(artifact: Artifact) -> str: + """Parsing out the well position from LocationDescriptor""" + + col, row = artifact.location[1].split(":") + if int(row) < 10: + row = "0" + row + return col + row + + def get_index_well(artifact: Artifact): """Parsing out the index well position from the reagent label string which typically looks like this: '44_A05 IDT_10nt_446 (AGCGTGACCT-CCATCCGAGT)' From 89e1dd63dd86115fdfe5a4ea159e34c878c63b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Thu, 9 Jan 2025 16:44:59 +0100 Subject: [PATCH 20/25] add more detailed error messages --- cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py index 9b7c35b2..a344a275 100644 --- a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py +++ b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py @@ -18,10 +18,12 @@ def set_udfs(udf: str, well_dict: dict, result_file: Path): """Reads the Quant-iT Excel file and sets the value for each sample""" failed_artifacts: int = 0 + skipped_artifacts: int = 0 df: pd.DataFrame = pd.read_excel(result_file, skiprows=11, header=None) for index, row in df.iterrows(): if row[0] not in well_dict.keys(): LOG.info(f"Well {row[0]} is not used by a sample in the step, skipping.") + skipped_artifacts += 1 continue elif pd.isna(row[2]): LOG.info( @@ -33,10 +35,13 @@ def set_udfs(udf: str, well_dict: dict, result_file: Path): artifact.udf[udf] = row[2] artifact.put() - if failed_artifacts: - raise MissingArtifactError( - f"Warning: Skipped {failed_artifacts} artifact(s) with wrong and/or blank values for some UDFs." - ) + if failed_artifacts or skipped_artifacts: + error_message = "Warning:" + if failed_artifacts: + error_message += f" Skipped {failed_artifacts} artifact(s) with wrong and/or blank values for some UDFs." + if skipped_artifacts: + error_message += f" Skipped {failed_artifacts} artifact(s) as they weren't represented in the result file." + raise MissingArtifactError(error_message) @click.command() From 6b18bece9dab0ac1ec72aa9b9dbac7f7eabdd938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 10 Jan 2025 15:45:52 +0100 Subject: [PATCH 21/25] first set of changes to file2udf --- cg_lims/EPPs/files/parsers/file_to_udf.py | 148 ++++++++++++++++------ cg_lims/options.py | 18 ++- 2 files changed, 128 insertions(+), 38 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py index 9513b610..028eb87a 100644 --- a/cg_lims/EPPs/files/parsers/file_to_udf.py +++ b/cg_lims/EPPs/files/parsers/file_to_udf.py @@ -4,11 +4,11 @@ import logging import sys from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional, Tuple import click from cg_lims import options -from cg_lims.exceptions import LimsError, MissingArtifactError, MissingFileError +from cg_lims.exceptions import ArgumentError, LimsError, MissingArtifactError, MissingFileError from cg_lims.get.artifacts import create_well_dict, get_artifact_by_name from cg_lims.get.files import get_file_path from genologics.entities import Artifact, Process @@ -16,66 +16,140 @@ LOG = logging.getLogger(__name__) -def set_udfs(well_field: str, value_field: str, udf: str, well_dict: dict, result_file: Path): +def make_udf_dict(udfs: Tuple[str], value_fields: Tuple[str]) -> Dict[str, str]: + """""" + if len(udfs) != len(value_fields): + raise ArgumentError( + f"The number of artifact-udfs to update and file value fields must be the same." + ) + udf_vf_dict: dict = {} + for i in range(len(udfs)): + udf_vf_dict[udfs[i]] = value_fields[i] + return udf_vf_dict + + +def get_file_placeholder_paths(placeholder_names: List[str], process: Process) -> List[str]: + """""" + file_paths = [] + for placeholder_name in placeholder_names: + file_artifact: Artifact = get_artifact_by_name(process=process, name=placeholder_name) + file_paths.append(get_file_path(file_artifact=file_artifact)) + return file_paths + + +def set_udfs_from_file( + well_field: str, udf_vf_dict: Dict[str, str], well_dict: dict, result_file: Path +) -> List[str]: """Reads the csv and sets the value for each sample""" error_msg: List[str] = [] passed_arts: int = 0 with open(result_file, newline="", encoding="latin1") as csvfile: reader: csv.DictReader = csv.DictReader(csvfile) - for sample in reader: - well: str = sample.get(well_field) - value: Any = sample.get(value_field) - if value is None: - error_msg.append("Some samples in the file had missing values.") - LOG.info(f"Missing value for sample {sample} in well {well}. Skipping!") + for udf_name in list(udf_vf_dict.keys()): + if udf_vf_dict[udf_name] not in reader.fieldnames: + LOG.info( + f"Value {udf_vf_dict[udf_name]} does not exist in file {result_file}, skipping." + ) continue - elif well not in well_dict: - LOG.info(f"Well {well} was not found in the step. Skipping!") - continue - artifact: Artifact = well_dict[well] - try: - artifact.udf[udf] = str(value) - except: - artifact.udf[udf] = float(value) - artifact.put() - passed_arts += 1 + value_field: str = udf_vf_dict.pop(udf_name) + + for sample in reader: + well: str = sample.get(well_field) + if well not in well_dict: + LOG.info(f"Well {well} was not found in the step. Skipping!") + continue + artifact: Artifact = well_dict[well] + + value: Any = sample.get(value_field) + if value is None: + error_msg.append("Some samples in the file had missing values.") + LOG.info(f"Missing value for sample {sample} in well {well}. Skipping!") + continue + try: + artifact.udf[udf_name] = str(value) + except: + artifact.udf[udf_name] = float(value) + artifact.put() + passed_arts += 1 if passed_arts < len(well_dict.keys()): error_msg.append("Some samples in the step were not represented in the file.") - error_string: str = " ".join(list(set(error_msg))) - if error_msg: + return error_msg + + +def set_udfs( + well_fields: List[str], + udf_vf_dict: Dict[str, str], + well_dict: dict, + file_placeholders: List[str], + local_files: Optional[List[str]], + process: Process, +) -> None: + """""" + if local_files: + files: List[str] = local_files + else: + files: List[str] = get_file_placeholder_paths( + placeholder_names=file_placeholders, process=process + ) + if len(well_fields) != len(files): + raise ArgumentError(f"The number of files to read and file value fields must be the same.") + + file_well_list: zip = zip(files, well_fields) + error_message: List[str] = [] + + for file_tuple in file_well_list: + file: str = file_tuple[0] + well_field: str = file_tuple[1] + if not Path(file).is_file(): + raise MissingFileError(f"No such file: {file}") + error_message += set_udfs_from_file( + well_field=well_field, + udf_vf_dict=udf_vf_dict, + well_dict=well_dict, + result_file=Path(file), + ) + + if error_message: + error_string: str = " ".join(list(set(error_message))) raise MissingArtifactError(error_string) @click.command() -@options.file_placeholder(help="File placeholder name.") -@options.local_file() -@options.udf() -@options.well_field() -@options.value_field() +@options.file_placeholders(help="File placeholder name.") +@options.local_files() +@options.udf_values() +@options.well_fields() +@options.value_fields() @options.input() @click.pass_context def csv_well_to_udf( - ctx, file: str, well_field: str, value_field: str, udf: str, input: bool, local_file: str + ctx, + files: Tuple[str], + local_files: Tuple[str], + udf_values: Tuple[str], + well_fields: Tuple[str], + value_fields: Tuple[str], + input: bool, ): """Script to copy data from file to udf based on well position""" LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") process: Process = ctx.obj["process"] - if local_file: - file_path: str = local_file - else: - file_art: Artifact = get_artifact_by_name(process=process, name=file) - file_path: str = get_file_path(file_art) - try: - if not Path(file_path).is_file(): - raise MissingFileError(f"No such file: {file_path}") well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input) - set_udfs(well_field, value_field, udf, well_dict, Path(file_path)) - click.echo("The udfs were sucessfully populated.") + udf_vf_dict: Dict[str, str] = make_udf_dict(udfs=udf_values, value_fields=value_fields) + set_udfs( + well_fields=list(well_fields), + udf_vf_dict=udf_vf_dict, + well_dict=well_dict, + file_placeholders=list(files), + local_files=list(local_files), + process=process, + ) + click.echo("The UDFs were successfully populated.") except LimsError as e: sys.exit(e.message) diff --git a/cg_lims/options.py b/cg_lims/options.py index b1e3473e..874a01dc 100644 --- a/cg_lims/options.py +++ b/cg_lims/options.py @@ -47,6 +47,12 @@ def well_field( return click.option("-wf", "--well-field", required=True, help=help) +def well_fields( + help: str = "Well field in file", +) -> click.option: + return click.option("-wf", "--well-fields", required=True, multiple=True, help=help) + + def value_field( help: str = "Value field in file", ) -> click.option: @@ -92,7 +98,7 @@ def file_placeholder( def file_placeholders( help: str = "File placeholder option used when multiple are possible.", ) -> click.option: - return click.option("-f", "--files", required=True, multiple=True, help=help) + return click.option("-f", "--files", required=False, multiple=True, help=help) def samples_file(help: str = "Txt file with sample ids") -> click.option: @@ -107,6 +113,10 @@ def local_file(help="local file path for debug purposes.") -> click.option: return click.option("-lf", "--local_file", required=False, help=help) +def local_files(help="local file paths for debug purposes.") -> click.option: + return click.option("-lf", "--local_files", required=False, multiple=True, help=help) + + def input( help: str = "Use this flag if you run the script from a QC step.", ) -> click.option: @@ -741,3 +751,9 @@ def round_decimals( help: str = "The number of decimals you want to round to.", ) -> click.option: return click.option("-r", "--round-decimals", required=False, help=help) + + +def value_fields( + help: str = "Value fields in file", +) -> click.option: + return click.option("-vf", "--value-fields", required=True, multiple=True, help=help) From 284390ccdab30e21010b576460eb0a4fa9b61ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Fri, 10 Jan 2025 16:32:48 +0100 Subject: [PATCH 22/25] docstrings --- cg_lims/EPPs/files/parsers/file_to_udf.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py index 028eb87a..a7b96f10 100644 --- a/cg_lims/EPPs/files/parsers/file_to_udf.py +++ b/cg_lims/EPPs/files/parsers/file_to_udf.py @@ -17,7 +17,7 @@ def make_udf_dict(udfs: Tuple[str], value_fields: Tuple[str]) -> Dict[str, str]: - """""" + """Create dictionary containing UDF names and their corresponding value field names.""" if len(udfs) != len(value_fields): raise ArgumentError( f"The number of artifact-udfs to update and file value fields must be the same." @@ -29,8 +29,8 @@ def make_udf_dict(udfs: Tuple[str], value_fields: Tuple[str]) -> Dict[str, str]: def get_file_placeholder_paths(placeholder_names: List[str], process: Process) -> List[str]: - """""" - file_paths = [] + """Convert a list of file placeholder names to complete file paths.""" + file_paths: List[str] = [] for placeholder_name in placeholder_names: file_artifact: Artifact = get_artifact_by_name(process=process, name=placeholder_name) file_paths.append(get_file_path(file_artifact=file_artifact)) @@ -40,8 +40,7 @@ def get_file_placeholder_paths(placeholder_names: List[str], process: Process) - def set_udfs_from_file( well_field: str, udf_vf_dict: Dict[str, str], well_dict: dict, result_file: Path ) -> List[str]: - """Reads the csv and sets the value for each sample""" - + """Parse a CSV file and set the corresponding UDF values for each sample.""" error_msg: List[str] = [] passed_arts: int = 0 with open(result_file, newline="", encoding="latin1") as csvfile: @@ -60,7 +59,6 @@ def set_udfs_from_file( LOG.info(f"Well {well} was not found in the step. Skipping!") continue artifact: Artifact = well_dict[well] - value: Any = sample.get(value_field) if value is None: error_msg.append("Some samples in the file had missing values.") @@ -87,7 +85,7 @@ def set_udfs( local_files: Optional[List[str]], process: Process, ) -> None: - """""" + """Loop through each given file and parse out the given values which are then set to their corresponding UDFs.""" if local_files: files: List[str] = local_files else: @@ -134,7 +132,7 @@ def csv_well_to_udf( value_fields: Tuple[str], input: bool, ): - """Script to copy data from file to udf based on well position""" + """Script to copy data from files to UDFs based on well position.""" LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") process: Process = ctx.obj["process"] From 8eabfdcb9122cdffb11b7441f1353dc6dad47e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Mon, 13 Jan 2025 09:33:30 +0100 Subject: [PATCH 23/25] some more stuff --- cg_lims/EPPs/files/parsers/file_to_udf.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py index a7b96f10..d0e8b841 100644 --- a/cg_lims/EPPs/files/parsers/file_to_udf.py +++ b/cg_lims/EPPs/files/parsers/file_to_udf.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - import csv import logging import sys @@ -60,7 +58,7 @@ def set_udfs_from_file( continue artifact: Artifact = well_dict[well] value: Any = sample.get(value_field) - if value is None: + if not value: error_msg.append("Some samples in the file had missing values.") LOG.info(f"Missing value for sample {sample} in well {well}. Skipping!") continue @@ -112,7 +110,7 @@ def set_udfs( if error_message: error_string: str = " ".join(list(set(error_message))) - raise MissingArtifactError(error_string) + raise MissingArtifactError(error_string + " See the log for details.") @click.command() From 936a8916b85e6c3af595bde9fa449024ffce04ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Mon, 13 Jan 2025 10:53:19 +0100 Subject: [PATCH 24/25] add new udf option to not break existing configs --- cg_lims/EPPs/files/parsers/file_to_udf.py | 6 +++--- cg_lims/options.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cg_lims/EPPs/files/parsers/file_to_udf.py b/cg_lims/EPPs/files/parsers/file_to_udf.py index d0e8b841..c925d2ec 100644 --- a/cg_lims/EPPs/files/parsers/file_to_udf.py +++ b/cg_lims/EPPs/files/parsers/file_to_udf.py @@ -116,7 +116,7 @@ def set_udfs( @click.command() @options.file_placeholders(help="File placeholder name.") @options.local_files() -@options.udf_values() +@options.udfs() @options.well_fields() @options.value_fields() @options.input() @@ -125,7 +125,7 @@ def csv_well_to_udf( ctx, files: Tuple[str], local_files: Tuple[str], - udf_values: Tuple[str], + udfs: Tuple[str], well_fields: Tuple[str], value_fields: Tuple[str], input: bool, @@ -137,7 +137,7 @@ def csv_well_to_udf( try: well_dict: Dict[str, Artifact] = create_well_dict(process=process, input_flag=input) - udf_vf_dict: Dict[str, str] = make_udf_dict(udfs=udf_values, value_fields=value_fields) + udf_vf_dict: Dict[str, str] = make_udf_dict(udfs=udfs, value_fields=value_fields) set_udfs( well_fields=list(well_fields), udf_vf_dict=udf_vf_dict, diff --git a/cg_lims/options.py b/cg_lims/options.py index 874a01dc..3c6d7319 100644 --- a/cg_lims/options.py +++ b/cg_lims/options.py @@ -29,6 +29,12 @@ def udf( return click.option("-u", "--udf", required=False, help=help) +def udfs( + help: str = "UDF names", +) -> click.option: + return click.option("-u", "--udfs", required=False, multiple=True, help=help) + + def buffer_udf( help: str = "UDF name", ) -> click.option: From e142fc604c0aca5420762654287b90fa135b7b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Sv=C3=A4rd?= Date: Mon, 13 Jan 2025 14:17:34 +0100 Subject: [PATCH 25/25] type hint --- cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py index a344a275..ee4c1a71 100644 --- a/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py +++ b/cg_lims/EPPs/files/parsers/quantit_excel_to_udf.py @@ -36,7 +36,7 @@ def set_udfs(udf: str, well_dict: dict, result_file: Path): artifact.put() if failed_artifacts or skipped_artifacts: - error_message = "Warning:" + error_message: str = "Warning:" if failed_artifacts: error_message += f" Skipped {failed_artifacts} artifact(s) with wrong and/or blank values for some UDFs." if skipped_artifacts: