Skip to content

Commit

Permalink
add model
Browse files Browse the repository at this point in the history
  • Loading branch information
GemmaTuron committed Mar 6, 2024
1 parent 83ce5e5 commit c4ce04a
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 21 deletions.
1 change: 0 additions & 1 deletion .gitattributes

This file was deleted.

4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM bentoml/model-server:0.11.0-py37
FROM bentoml/model-server:0.11.0-py311
MAINTAINER ersilia

RUN pip install rdkit
RUN pip install rdkit==2023.9.5

WORKDIR /repo
COPY . /repo
18 changes: 9 additions & 9 deletions metadata.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
{
"Identifier": "eos5guo",
"Slug": "erg-descs",
"Status": "In progress",
"Status": "Ready",
"Title": "ErG 2D Descriptors",
"Description": "The Extended Reduced Graph (ErG) approach uses the description of pharmacophore nodes to encode molecular properties, with the goal of correctly describing pharmacophoric properties, size and shape of molecules. It was benchmarked against Daylight fingerprints and outperformed them in 10 out of 11 cases. ErG descriptors are well suited for scaffold hopping approaches.",
"Mode": "",
"Task": [],
"Input": [],
"Input Shape": "",
"Output": [],
"Output Type": [],
"Output Shape": "",
"Interpretation": "",
"Mode": "Pretrained",
"Task": ["Representation"],
"Input": ["Compound"],
"Input Shape": "Single",
"Output": ["Descriptor"],
"Output Type": ["Integer"],
"Output Shape": "List",
"Interpretation": "Vector representing SMILES",
"Tag": [
"Descriptor",
"Fingerprint"
Expand Down
3 changes: 0 additions & 3 deletions mock.txt

This file was deleted.

Binary file added model/.DS_Store
Binary file not shown.
18 changes: 12 additions & 6 deletions model/framework/code/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# imports
import os
import csv
import numpy as np
import sys
from rdkit import Chem
from rdkit.Chem.Descriptors import MolWt
from rdkit.Chem import rdReducedGraphs


# parse arguments
input_file = sys.argv[1]
Expand All @@ -13,8 +15,11 @@
root = os.path.dirname(os.path.abspath(__file__))

# my model
def my_model(smiles_list):
return [MolWt(Chem.MolFromSmiles(smi)) for smi in smiles_list]
def erg_desc(smiles_list):
mols = [Chem.MolFromSmiles(smi) for smi in smiles_list]
ergfps = [rdReducedGraphs.GetErGFingerprint(mol) for mol in mols]
array_ergfps = [np.array(fp) for fp in ergfps]
return array_ergfps


# read SMILES from .csv file, assuming one column with header
Expand All @@ -24,16 +29,17 @@ def my_model(smiles_list):
smiles_list = [r[0] for r in reader]

# run model
outputs = my_model(smiles_list)
outputs = erg_desc(smiles_list)

#check input and output have the same lenght
input_len = len(smiles_list)
output_len = len(outputs)
assert input_len == output_len


# write output in a .csv file
with open(output_file, "w") as f:
writer = csv.writer(f)
writer.writerow(["value"]) # header
writer.writerow(["erg-{}".format(i) for i in range(len(outputs[0]))]) # header
for o in outputs:
writer.writerow([o])
writer.writerow(o)

0 comments on commit c4ce04a

Please sign in to comment.