RECETOX · acquayefrank · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/tools/ipapy2/.shed.yml b/tools/ipapy2/.shed.yml
@@ -0,0 +1,15 @@
+name: ipaPy2
+owner: 
+remote_repository_url: "https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2"
+homepage_url: "https://github.com/francescodc87/ipaPy2"
+categories:
+  - Metabolomics
+description: "Mass spectrometry data annotation tool."
+long_description: "New Python implementation of the Integrated Probabilistic Annotation (IPA) - A Bayesian annotation method for LC/MS data integrating biochemical relations, isotope patterns and adduct formation."
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "{{ tool_name }} tool from the ipaPy2 package"
+suite:
+  name: suite_ipapy2
+  description: tools from the ipaPy2 suite are used for annotation of mass spectrometry data
+  type: repository_suite_definition
diff --git a/tools/ipapy2/ipapy2_MS1_annotation.py b/tools/ipapy2/ipapy2_MS1_annotation.py
@@ -0,0 +1,113 @@
+import argparse
+import os
+
+import pandas as pd
+from ipaPy2 import ipa
+
+
+def main(args):
+    df = pd.read_csv(args.mapped_isotope_patterns, keep_default_na=False)
+    df = df.replace("", None)
+    all_adducts = pd.read_csv(args.all_adducts, keep_default_na=False)
+    all_adducts = all_adducts.replace("", None)
+    ncores = int(os.environ.get("GALAXY_SLOTS")) if args.ncores is None else args.ncores
+    ppmunk = args.ppmunk if args.ppmunk else args.ppm
+    ppmthr = args.ppmthr if args.ppmthr else 2 * args.ppm
+
+    annotations = ipa.MS1annotation(
+        df,
+        all_adducts,
+        ppm=args.ppm,
+        me=args.me,
+        ratiosd=args.ratiosd,
+        ppmunk=ppmunk,
+        ratiounk=args.ratiounk,
+        ppmthr=ppmthr,
+        pRTNone=args.pRTNone,
+        pRTout=args.pRTout,
+        ncores=int(ncores),
+    )
+    annotations_flat = pd.DataFrame()
+    for peak_id in annotations:
+        annotation = annotations[peak_id]
+        annotation["peak_id"] = peak_id
+        annotations_flat = pd.concat([annotations_flat, annotation])
+    annotations_file = (
+        args.MS1_annotations if args.MS1_annotations else "MS1_annotations.csv"
+    )
+    annotations_flat.to_csv(annotations_file, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mapped_isotope_patterns",
+        type=str,
+        required=True,
+        help="A csv file containing the MS1 data. Ideally obtained from map_isotope_patterns",
+    )
+    parser.add_argument(
+        "--all_adducts",
+        type=str,
+        required=True,
+        help="A csv file containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts",
+    )
+    parser.add_argument(
+        "--ppm",
+        type=float,
+        required=True,
+        help="accuracy of the MS instrument used.",
+    )
+    parser.add_argument(
+        "--me",
+        type=float,
+        default=5.48579909065e-04,
+        help="accurate mass of the electron. Default 5.48579909065e-04",
+    )
+    parser.add_argument(
+        "--ratiosd",
+        type=float,
+        default=0.9,
+        help="acceptable ratio between predicted intensity and observed intensity of isotopes.",
+    )
+    parser.add_argument(
+        "--ppmunk",
+        type=float,
+        help="pm associated to the 'unknown' annotation. If not provided equal to ppm.",
+    )
+    parser.add_argument(
+        "--ratiounk",
+        type=float,
+        default=0.5,
+        help="isotope ratio associated to the 'unknown' annotation.",
+    )
+    parser.add_argument(
+        "--ppmthr",
+        type=float,
+        help="maximum ppm possible for the annotations. if not provided equal to 2*ppm.",
+    )
+    parser.add_argument(
+        "--pRTNone",
+        type=float,
+        default=0.8,
+        help="multiplicative factor for the RT if no RTrange present in the database.",
+    )
+    parser.add_argument(
+        "--pRTout",
+        type=float,
+        default=0.4,
+        help="multiplicative factor for the RT if measured RT is outside the RTrange present in the database.",
+    )
+    parser.add_argument(
+        "--MS1_annotations",
+        type=str,
+        help="MS1 annotation file for outputting results.",
+    )
+    parser.add_argument(
+        "--ncores",
+        type=int,
+        default=None,
+        help="number of cores to use for the computation.",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/tools/ipapy2/ipapy2_MS1_annotation.xml b/tools/ipapy2/ipapy2_MS1_annotation.xml
@@ -0,0 +1,92 @@
+<tool id="ipapy2_MS1_annotation" name="IPA MS1 annotation" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        python3  '${__tool_directory__}/ipapy2_MS1_annotation.py'
+        --mapped_isotope_patterns '${mapped_isotope_patterns}'
+        --all_adducts '${all_adducts}'
+        --ppm ${ppm}
+        --me ${me}
+        --ratiosd ${ratiosd}
+        #if $ppmunk
+            --ppmunk ${ppmunk}
+        #else
+            --ppmunk ${ppm}
+        #end if
+        --ratiounk ${ratiounk}
+        #if $ppmthr
+            --ppmthr ${ppmthr}
+        #else
+            --ppmthr 0
+        #end if
+        --pRTNone ${pRTNone}
+        --pRTout ${pRTout}
+        --MS1_annotations ${MS1_annotations}
+    ]]></command>
+
+    <inputs>
+        <param label="Mapped isotope patterns" name="mapped_isotope_patterns" type="data" format="csv" help="A csv file containing the MS1 data. Ideally obtained from map_isotope_patterns" />
+        <param label="all possible adducts" name="all_adducts" type="data" format="csv" help="A csv file containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts" />
+        <param label="ppm" name="ppm" type="float" help="accuracy of the MS instrument used."/>
+        <section name="unknown" title="unknown settings">
+            <param name="ppmunk" type="float" optional="true">
+                <label>ppm for unknown</label>
+                <help>ppm associated to the 'unknown' annotation. If not provided equal to ppm.</help>
+            </param>
+            <param name="ratiounk" type="float" optional="true" value="0.5">
+                <label>isotope ratio for unknown</label>
+                <help>isotope ratio associated to the 'unknown' annotation.</help>
+            </param>
+        </section>
+        <section name="optional_settings" title="optional settings">
+            <param name="me" type="float" value="5.48579909065e-04">
+                <label>mass of the electron.</label>
+                <help>accurate mass of the electron. Default 5.48579909065e-04.</help>
+            </param>
+            <param name="ratiosd" type="float" value="0.9" optional="true">
+                <label>intensity ratio</label>
+                <help>acceptable ratio between predicted intensity and observed intensity of isotopes</help>
+            </param>
+            <param name="ppmthr" type="float" optional="true">
+                <label>ppm threshold</label>
+                <help>maximum ppm possible for the annotations. if not provided equal to 2*ppm.</help>
+            </param>
+            <param name="pRTNone" type="float" optional="true" value="0.8">
+                <label>no RT factor</label>
+                <help>multiplicative factor for the RT if no RTrange present in the database.</help>
+            </param>
+            <param name="pRTout" type="float" optional="true" value="0.4">
+                <label>outside RT factor</label>
+                <help>multiplicative factor for the RT if measured RT is outside the RTrange present in the database.</help>
+            </param>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="MS1_annotations" format="csv"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="mapped_isotope_patterns" value="mapped_isotope_patterns.csv"/>
+            <param name="all_adducts" value="all_adducts.csv"/>
+            <param name="ppm" value="3"/>
+            <output name="MS1_annotations" file="MS1_annotations.csv"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+    ::
+        Annotation of the dataset base on the MS1 information. Prior probabilities
+        are based on mass only, while post probabilities are based on mass, RT,
+        previous knowledge and isotope patterns.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/ipapy2/ipapy2_MS2_annotation.py b/tools/ipapy2/ipapy2_MS2_annotation.py
@@ -0,0 +1,165 @@
+import argparse
+import os
+
+import pandas as pd
+from ipaPy2 import ipa
+
+
+def main(args):
+    df = pd.read_csv(args.mapped_isotope_patterns, keep_default_na=False)
+    df = df.replace("", None)
+    dfMS2 = pd.read_csv(args.MS2_fragmentation_data, keep_default_na=False)
+    dfMS2 = dfMS2.replace('', None)
+    all_adducts = pd.read_csv(args.all_adducts, keep_default_na=False)
+    all_adducts = all_adducts.replace("", None)
+    MS2_DB = pd.read_csv(args.MS2_DB, keep_default_na=False)
+    MS2_DB = MS2_DB.replace("", None)
+
+    ncores = int(os.environ.get("GALAXY_SLOTS")) if args.ncores is None else args.ncores
+    ppmthr = args.ppmthr if args.ppmthr else 2 * args.ppm
+
+    annotations = ipa.MSMSannotation(
+        df, 
+        dfMS2, 
+        all_adducts, 
+        MS2_DB, 
+        ppm=args.ppm, 
+        ratiosd=args.ratiosd, 
+        ppmunk=args.ppmunk, 
+        ratiounk=args.ratiounk, 
+        ppmthr=ppmthr, 
+        pRTNone=args.pRTNone, 
+        pRTout=args.pRTout, 
+        mzdCS=args.mzdCS, 
+        ppmCS=args.ppmCS, 
+        CSunk=args.CSunk, 
+        evfilt=args.evfilt, 
+        ncores=ncores
+    )
+    annotations_flat = pd.DataFrame()
+    for peak_id in annotations:
+        annotation = annotations[peak_id]
+        annotation["peak_id"] = peak_id
+        annotations_flat = pd.concat([annotations_flat, annotation])
+    annotations_file = (
+        args.MS2_annotations if args.MS2_annotations else "MS2_annotations.csv"
+    )
+    annotations_flat.to_csv(annotations_file, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mapped_isotope_patterns",
+        type=str,
+        required=True,
+        help="A csv file containing the MS1 data. Ideally obtained from map_isotope_patterns",
+    )
+    parser.add_argument(
+        "--MS2_fragmentation_data",
+        type=str,
+        required=True,
+        help="A csv file containing the MS2 fragmentation data",
+    )
+    parser.add_argument(
+        "--all_adducts",
+        type=str,
+        required=True,
+        help="A csv file containing the information on all the possible adducts given the database. Ideally obtained from compute_all_adducts",
+    )
+    parser.add_argument(
+        "--MS2_DB",
+        type=str,
+        required=True,
+        help="A csv file containing the MS2 database",
+    )
+    parser.add_argument(
+        "--ppm",
+        type=float,
+        required=True,
+        help="accuracy of the MS instrument used.",
+    )
+    parser.add_argument(
+        "--me",
+        type=float,
+        default=5.48579909065e-04,
+        help="accurate mass of the electron. Default 5.48579909065e-04",
+
+    )
+    parser.add_argument(
+        "--ratiosd",
+        type=float,
+        default=0.9,
+        help="acceptable ratio between predicted intensity and observed intensity of isotopes.",
+    )
+    parser.add_argument(
+        "--ppmunk",
+        type=float,
+        help="pm associated to the 'unknown' annotation. If not provided equal to ppm.",
+    )
+    parser.add_argument(
+        "--ratiounk",
+        type=float,
+        default=0.5,
+        help="isotope ratio associated to the 'unknown' annotation.",
+    )
+    parser.add_argument(
+        "--ppmthr",
+        type=float,
+        help="maximum ppm possible for the annotations. if not provided equal to 2*ppm.",
+    )
+    parser.add_argument(
+        "--pRTNone",
+        type=float,
+        default=0.8,
+        help="multiplicative factor for the RT if no RTrange present in the database.",
+    )
+    parser.add_argument(
+        "--pRTout",
+        type=float,
+        default=0.4,
+        help="multiplicative factor for the RT if measured RT is outside the RTrange present in the database.",
+    )
+    parser.add_argument(
+        "--mzdCS",
+        type=int,
+        default=0,
+        help="""maximum mz difference allowed when computing cosine similarity
+           scores. If one wants to use this parameter instead of ppmCS, this
+           must be set to 0. Default 0.""",
+    )
+    parser.add_argument(
+        "--ppmCS",
+        type=int,
+        default=10,
+        help="""maximum ppm allowed when computing cosine similarity scores.
+           If one wants to use this parameter instead of mzdCS, this must be
+           set to 0. Default 10.""",
+    )
+    parser.add_argument(
+        "--CSunk",
+        type=float,
+        default=0.7,
+        help="""cosine similarity score associated with the 'unknown' annotation.
+            Default 0.7""",
+    )
+    parser.add_argument(
+        "--evfilt",
+        type=bool,
+        default=False,
+        help="""Default value False. If true, only spectrum acquired with the same
+            collision energy are considered.""",
+    )
+    parser.add_argument(
+        "--MS2_annotations",
+        type=str,
+        help="MS2 annotation file for outputting results.",
+    )
+    parser.add_argument(
+        "--ncores",
+        type=int,
+        default=None,
+        help="number of cores to use for the computation.",
+    )
+    args = parser.parse_args()
+    main(args)