Skip to content

Commit

Permalink
completed ms annotation code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
acquayefrank committed Jan 30, 2025
1 parent 77a0c8b commit 4cfdafd
Show file tree
Hide file tree
Showing 14 changed files with 311 additions and 251 deletions.
108 changes: 62 additions & 46 deletions tools/ipapy2/ipapy2_MS1_annotation.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,71 @@
import argparse
import os

import pandas as pd
from ipaPy2 import ipa
from utils import LoadDataAction, StoreOutputAction, flattern_annotations


def main(args):
df = pd.read_csv(args.mapped_isotope_patterns, keep_default_na=False)
df = df.replace("", None)
all_adducts = pd.read_csv(args.all_adducts, keep_default_na=False)
all_adducts = all_adducts.replace("", None)
ncores = int(os.environ.get("GALAXY_SLOTS")) if args.ncores is None else args.ncores
ppmunk = args.ppmunk if args.ppmunk else args.ppm
ppmthr = args.ppmthr if args.ppmthr else 2 * args.ppm
def main(
input_dataset_database,
input_dataset_adduct,
ppm,
ratiosd,
ppmunk,
ratiounk,
ppmthr,
pRTNone,
pRTout,
output_dataset,
ncores,
):
write_func, file_path = output_dataset
ncores = ncores if ncores else 1
ppmunk = ppmunk if ppmunk else ppm
ppmthr = ppmthr if ppmthr else 2 * ppm

annotations = ipa.MS1annotation(
df,
all_adducts,
ppm=args.ppm,
me=args.me,
ratiosd=args.ratiosd,
input_dataset_database,
input_dataset_adduct,
ppm=ppm,
ratiosd=ratiosd,
ppmunk=ppmunk,
ratiounk=args.ratiounk,
ratiounk=ratiounk,
ppmthr=ppmthr,
pRTNone=args.pRTNone,
pRTout=args.pRTout,
ncores=int(ncores),
)
annotations_flat = pd.DataFrame()
for peak_id in annotations:
annotation = annotations[peak_id]
annotation["peak_id"] = peak_id
annotations_flat = pd.concat([annotations_flat, annotation])
annotations_file = (
args.MS1_annotations if args.MS1_annotations else "MS1_annotations.csv"
pRTNone=pRTNone,
pRTout=pRTout,
ncores=ncores,
)
annotations_flat.to_csv(annotations_file, index=False)
annotations_flat = flattern_annotations(annotations)
write_func(annotations_flat, file_path)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(
"""
Annotation of the dataset based on the MS1 information. Prior probabilities
are based on mass only, while post probabilities are based on mass, RT,
previous knowledge and isotope patterns.
"""
)
parser.add_argument(
"--mapped_isotope_patterns",
type=str,
"--input_dataset_database",
nargs=2,
action=LoadDataAction,
required=True,
help="A csv file containing the MS1 data. Ideally obtained from map_isotope_patterns",
help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns",
)
parser.add_argument(
"--all_adducts",
type=str,
"--input_dataset_adducts",
nargs=2,
action=LoadDataAction,
required=True,
help="A csv file containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts",
help="A dataset containing information on all possible adducts.",
)
parser.add_argument(
"--ppm",
type=float,
required=True,
default=100,
help="accuracy of the MS instrument used.",
)
parser.add_argument(
"--me",
type=float,
default=5.48579909065e-04,
help="accurate mass of the electron. Default 5.48579909065e-04",
)
parser.add_argument(
"--ratiosd",
type=float,
Expand Down Expand Up @@ -99,9 +101,11 @@ def main(args):
help="multiplicative factor for the RT if measured RT is outside the RTrange present in the database.",
)
parser.add_argument(
"--MS1_annotations",
type=str,
help="MS1 annotation file for outputting results.",
"--output_dataset",
nargs=2,
action=StoreOutputAction,
required=True,
help="MS1 annotated data",
)
parser.add_argument(
"--ncores",
Expand All @@ -110,4 +114,16 @@ def main(args):
help="number of cores to use for the computation.",
)
args = parser.parse_args()
main(args)
main(
args.input_dataset_database,
args.input_dataset_adducts,
args.ppm,
args.ratiosd,
args.ppmunk,
args.ratiounk,
args.ppmthr,
args.pRTNone,
args.pRTout,
args.output_dataset,
args.ncores,
)
51 changes: 13 additions & 38 deletions tools/ipapy2/ipapy2_MS1_annotation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
</requirements>

<command detect_errors="exit_code"><![CDATA[
#set $computed_ppmthr = float($ppm) * 2
python3 '${__tool_directory__}/ipapy2_MS1_annotation.py'
--mapped_isotope_patterns '${mapped_isotope_patterns}'
--all_adducts '${all_adducts}'
--input_dataset_database '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
--input_dataset_adducts '${all_adducts}' '${all_adducts.ext}'
--ppm ${ppm}
--me ${me}
--ratiosd ${ratiosd}
#if $ppmunk
--ppmunk ${ppmunk}
Expand All @@ -23,53 +23,28 @@
#if $ppmthr
--ppmthr ${ppmthr}
#else
--ppmthr 0
--ppmthr ${computed_ppmthr}
#end if
--pRTNone ${pRTNone}
--pRTout ${pRTout}
--MS1_annotations ${MS1_annotations}
--output_dataset '${MS1_annotations}' '${MS1_annotations.ext}'
--ncores \${GALAXY_SLOTS:-1}
]]></command>

<inputs>
<param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tarbular,parquet" help="A csv file containing the MS1 data. Ideally obtained from map_isotope_patterns" />
<param label="all possible adducts" name="all_adducts" type="data" format="csv,tsv,tarbular,parquet" help="A csv file containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
<param label="ppm" name="ppm" type="float" help="accuracy of the MS instrument used."/>
<param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the MS1 data. Ideally obtained from map_isotope_patterns" />
<param label="all possible adducts" name="all_adducts" type="data" format="csv,tsv,tabular,parquet" help="A dataset containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
<expand macro="ppm"/>
<section name="unknown" title="unknown settings">
<param name="ppmunk" type="float" optional="true">
<label>ppm for unknown</label>
<help>ppm associated to the 'unknown' annotation. If not provided equal to ppm.</help>
</param>
<param name="ratiounk" type="float" optional="true" value="0.5">
<label>isotope ratio for unknown</label>
<help>isotope ratio associated to the 'unknown' annotation.</help>
</param>
<expand macro="ms_unknown"/>
</section>
<section name="optional_settings" title="optional settings">
<param name="me" type="float" value="5.48579909065e-04">
<label>mass of the electron.</label>
<help>accurate mass of the electron. Default 5.48579909065e-04.</help>
</param>
<param name="ratiosd" type="float" value="0.9" optional="true">
<label>intensity ratio</label>
<help>acceptable ratio between predicted intensity and observed intensity of isotopes</help>
</param>
<param name="ppmthr" type="float" optional="true">
<label>ppm threshold</label>
<help>maximum ppm possible for the annotations. if not provided equal to 2*ppm.</help>
</param>
<param name="pRTNone" type="float" optional="true" value="0.8">
<label>no RT factor</label>
<help>multiplicative factor for the RT if no RTrange present in the database.</help>
</param>
<param name="pRTout" type="float" optional="true" value="0.4">
<label>outside RT factor</label>
<help>multiplicative factor for the RT if measured RT is outside the RTrange present in the database.</help>
</param>
<expand macro="ms_options"/>
</section>
</inputs>

<outputs>
<data label="${tool.name} on ${on_string}" name="MS1_annotations" format="csv,tsv,tarbular,parquet"/>
<data label="${tool.name} on ${on_string}" name="MS1_annotations" format_source="mapped_isotope_patterns"/>
</outputs>

<tests>
Expand All @@ -83,7 +58,7 @@

<help><![CDATA[
::
Annotation of the dataset base on the MS1 information. Prior probabilities
Annotation of the dataset based on the MS1 information. Prior probabilities
are based on mass only, while post probabilities are based on mass, RT,
previous knowledge and isotope patterns.
]]></help>
Expand Down
Loading

0 comments on commit 4cfdafd

Please sign in to comment.