From 58869e4e312811bcdc044fc9db5003432462f086 Mon Sep 17 00:00:00 2001 From: GemmaTuron Date: Wed, 6 Mar 2024 20:24:07 +0100 Subject: [PATCH] updating readme [skip ci] --- .gitattributes | 1 - Dockerfile | 4 ++-- README.md | 44 +++++++++++++++++++++++++++++++++-- metadata.json | 18 +++++++------- mock.txt | 3 --- model/.DS_Store | Bin 0 -> 6148 bytes model/framework/code/main.py | 18 +++++++++----- 7 files changed, 65 insertions(+), 23 deletions(-) delete mode 100644 .gitattributes delete mode 100644 mock.txt create mode 100644 model/.DS_Store diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 6293b60..0000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -mock.txt filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile index fd70463..86ead92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ -FROM bentoml/model-server:0.11.0-py37 +FROM bentoml/model-server:0.11.0-py311 MAINTAINER ersilia -RUN pip install rdkit +RUN pip install rdkit==2023.9.5 WORKDIR /repo COPY . /repo diff --git a/README.md b/README.md index 46656b8..bf5f021 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,43 @@ -# Ersilia Model In Progress +# ErG 2D Descriptors -This model is work in progress. Please edit the [metadata.json](metadata.json) file to complete the information about the model. This README file will be updated automatically based on the information contained in that folder. \ No newline at end of file +The Extended Reduced Graph (ErG) approach uses the description of pharmacophore nodes to encode molecular properties, with the goal of correctly describing pharmacophoric properties, size and shape of molecules. It was benchmarked against Daylight fingerprints and outperformed them in 10 out of 11 cases. ErG descriptors are well suited for scaffold hopping approaches. + +## Identifiers + +* EOS model ID: `eos5guo` +* Slug: `erg-descs` + +## Characteristics + +* Input: `Compound` +* Input Shape: `Single` +* Task: `Representation` +* Output: `Descriptor` +* Output Type: `Integer` +* Output Shape: `List` +* Interpretation: Vector representing SMILES + +## References + +* [Publication](https://pubs.acs.org/doi/10.1021/ci050457y) +* [Source Code](https://www.rdkit.org/docs/source/rdkit.Chem.rdReducedGraphs.html) +* Ersilia contributor: [GemmaTuron](https://github.com/GemmaTuron) + +## Ersilia model URLs +* [GitHub](https://github.com/ersilia-os/eos5guo) + +## Citation + +If you use this model, please cite the [original authors](https://pubs.acs.org/doi/10.1021/ci050457y) of the model and the [Ersilia Model Hub](https://github.com/ersilia-os/ersilia/blob/master/CITATION.cff). + +## License + +This package is licensed under a GPL-3.0 license. The model contained within this package is licensed under a BSD-3.0 license. + +Notice: Ersilia grants access to these models 'as is' provided by the original authors, please refer to the original code repository and/or publication if you use the model in your research. + +## About Us + +The [Ersilia Open Source Initiative](https://ersilia.io) is a Non Profit Organization ([1192266](https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/5170657/full-print)) with the mission is to equip labs, universities and clinics in LMIC with AI/ML tools for infectious disease research. + +[Help us](https://www.ersilia.io/donate) achieve our mission! \ No newline at end of file diff --git a/metadata.json b/metadata.json index 828db20..79bd733 100644 --- a/metadata.json +++ b/metadata.json @@ -1,17 +1,17 @@ { "Identifier": "eos5guo", "Slug": "erg-descs", - "Status": "In progress", + "Status": "Ready", "Title": "ErG 2D Descriptors", "Description": "The Extended Reduced Graph (ErG) approach uses the description of pharmacophore nodes to encode molecular properties, with the goal of correctly describing pharmacophoric properties, size and shape of molecules. It was benchmarked against Daylight fingerprints and outperformed them in 10 out of 11 cases. ErG descriptors are well suited for scaffold hopping approaches.", - "Mode": "", - "Task": [], - "Input": [], - "Input Shape": "", - "Output": [], - "Output Type": [], - "Output Shape": "", - "Interpretation": "", + "Mode": "Pretrained", + "Task": ["Representation"], + "Input": ["Compound"], + "Input Shape": "Single", + "Output": ["Descriptor"], + "Output Type": ["Integer"], + "Output Shape": "List", + "Interpretation": "Vector representing SMILES", "Tag": [ "Descriptor", "Fingerprint" diff --git a/mock.txt b/mock.txt deleted file mode 100644 index c33f4fb..0000000 --- a/mock.txt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:73c9ad5608a69a3c694b90527604a01ccf3c82f9e27468e83a36317aaaa1ef56 -size 28 diff --git a/model/.DS_Store b/model/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..733e55a258c4a4ee02c40c718f4f95c567b7b895 GIT binary patch literal 6148 zcmeHKJ5EDE3>=dbL1|J_?iIMfDhema1waZ=0ErX{XkV3cX}$H&K*z&DT8dg+pS{nGZTqKLf6dObYz90w>2{6|evR literal 0 HcmV?d00001 diff --git a/model/framework/code/main.py b/model/framework/code/main.py index 6729254..241ed02 100644 --- a/model/framework/code/main.py +++ b/model/framework/code/main.py @@ -1,9 +1,11 @@ # imports import os import csv +import numpy as np import sys from rdkit import Chem -from rdkit.Chem.Descriptors import MolWt +from rdkit.Chem import rdReducedGraphs + # parse arguments input_file = sys.argv[1] @@ -13,8 +15,11 @@ root = os.path.dirname(os.path.abspath(__file__)) # my model -def my_model(smiles_list): - return [MolWt(Chem.MolFromSmiles(smi)) for smi in smiles_list] +def erg_desc(smiles_list): + mols = [Chem.MolFromSmiles(smi) for smi in smiles_list] + ergfps = [rdReducedGraphs.GetErGFingerprint(mol) for mol in mols] + array_ergfps = [np.array(fp) for fp in ergfps] + return array_ergfps # read SMILES from .csv file, assuming one column with header @@ -24,16 +29,17 @@ def my_model(smiles_list): smiles_list = [r[0] for r in reader] # run model -outputs = my_model(smiles_list) +outputs = erg_desc(smiles_list) #check input and output have the same lenght input_len = len(smiles_list) output_len = len(outputs) assert input_len == output_len + # write output in a .csv file with open(output_file, "w") as f: writer = csv.writer(f) - writer.writerow(["value"]) # header + writer.writerow(["erg-{}".format(i) for i in range(len(outputs[0]))]) # header for o in outputs: - writer.writerow([o]) + writer.writerow(o)