From 58869e4e312811bcdc044fc9db5003432462f086 Mon Sep 17 00:00:00 2001
From: GemmaTuron <gemma@ersilia.io>
Date: Wed, 6 Mar 2024 20:24:07 +0100
Subject: [PATCH] updating readme [skip ci]

---
 .gitattributes               |   1 -
 Dockerfile                   |   4 ++--
 README.md                    |  44 +++++++++++++++++++++++++++++++++--
 metadata.json                |  18 +++++++-------
 mock.txt                     |   3 ---
 model/.DS_Store              | Bin 0 -> 6148 bytes
 model/framework/code/main.py |  18 +++++++++-----
 7 files changed, 65 insertions(+), 23 deletions(-)
 delete mode 100644 .gitattributes
 delete mode 100644 mock.txt
 create mode 100644 model/.DS_Store

diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 6293b60..0000000
--- a/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-mock.txt filter=lfs diff=lfs merge=lfs -text
diff --git a/Dockerfile b/Dockerfile
index fd70463..86ead92 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
-FROM bentoml/model-server:0.11.0-py37
+FROM bentoml/model-server:0.11.0-py311
 MAINTAINER ersilia
 
-RUN pip install rdkit
+RUN pip install rdkit==2023.9.5
 
 WORKDIR /repo
 COPY . /repo
diff --git a/README.md b/README.md
index 46656b8..bf5f021 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,43 @@
-# Ersilia Model In Progress
+# ErG 2D Descriptors
 
-This model is work in progress. Please edit the [metadata.json](metadata.json) file to complete the information about the model. This README file will be updated automatically based on the information contained in that folder.
\ No newline at end of file
+The Extended Reduced Graph (ErG) approach uses the description of pharmacophore nodes to encode molecular properties, with the goal of correctly describing pharmacophoric properties, size and shape of molecules. It was benchmarked against Daylight fingerprints and outperformed them in 10 out of 11 cases. ErG descriptors are well suited for scaffold hopping approaches.
+
+## Identifiers
+
+* EOS model ID: `eos5guo`
+* Slug: `erg-descs`
+
+## Characteristics
+
+* Input: `Compound`
+* Input Shape: `Single`
+* Task: `Representation`
+* Output: `Descriptor`
+* Output Type: `Integer`
+* Output Shape: `List`
+* Interpretation: Vector representing SMILES
+
+## References
+
+* [Publication](https://pubs.acs.org/doi/10.1021/ci050457y)
+* [Source Code](https://www.rdkit.org/docs/source/rdkit.Chem.rdReducedGraphs.html)
+* Ersilia contributor: [GemmaTuron](https://github.com/GemmaTuron)
+
+## Ersilia model URLs
+* [GitHub](https://github.com/ersilia-os/eos5guo)
+
+## Citation
+
+If you use this model, please cite the [original authors](https://pubs.acs.org/doi/10.1021/ci050457y) of the model and the [Ersilia Model Hub](https://github.com/ersilia-os/ersilia/blob/master/CITATION.cff).
+
+## License
+
+This package is licensed under a GPL-3.0 license. The model contained within this package is licensed under a BSD-3.0 license.
+
+Notice: Ersilia grants access to these models 'as is' provided by the original authors, please refer to the original code repository and/or publication if you use the model in your research.
+
+## About Us
+
+The [Ersilia Open Source Initiative](https://ersilia.io) is a Non Profit Organization ([1192266](https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/5170657/full-print)) with the mission is to equip labs, universities and clinics in LMIC with AI/ML tools for infectious disease research.
+
+[Help us](https://www.ersilia.io/donate) achieve our mission!
\ No newline at end of file
diff --git a/metadata.json b/metadata.json
index 828db20..79bd733 100644
--- a/metadata.json
+++ b/metadata.json
@@ -1,17 +1,17 @@
 {
     "Identifier": "eos5guo",
     "Slug": "erg-descs",
-    "Status": "In progress",
+    "Status": "Ready",
     "Title": "ErG 2D Descriptors",
     "Description": "The Extended Reduced Graph (ErG) approach uses the description of pharmacophore nodes to encode molecular properties, with the goal of correctly describing pharmacophoric properties, size and shape of molecules. It was benchmarked against Daylight fingerprints and outperformed them in 10 out of 11 cases. ErG descriptors are well suited for scaffold hopping approaches.",
-    "Mode": "",
-    "Task": [],
-    "Input": [],
-    "Input Shape": "",
-    "Output": [],
-    "Output Type": [],
-    "Output Shape": "",
-    "Interpretation": "",
+    "Mode": "Pretrained",
+    "Task": ["Representation"],
+    "Input": ["Compound"],
+    "Input Shape": "Single",
+    "Output": ["Descriptor"],
+    "Output Type": ["Integer"],
+    "Output Shape": "List",
+    "Interpretation": "Vector representing SMILES",
     "Tag": [
         "Descriptor",
         "Fingerprint"
diff --git a/mock.txt b/mock.txt
deleted file mode 100644
index c33f4fb..0000000
--- a/mock.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:73c9ad5608a69a3c694b90527604a01ccf3c82f9e27468e83a36317aaaa1ef56
-size 28
diff --git a/model/.DS_Store b/model/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..733e55a258c4a4ee02c40c718f4f95c567b7b895
GIT binary patch
literal 6148
zcmeHKJ5EDE3>=dbL1|J_?iIMfDhema1waZ=0ErX{XkV3c<!Bjygos{fkZ90YvS-)p
z*{3(f^D_WjzHe@U1%Mgd5eE-r^K<u+T~)@2bl&lZPwcSA%g1_DeL3OWYdn$jfVc1b
z{q}XYea@RNQ<aqhQa}nw0VyB_exZQ(UfOJ)s3--bfE4&rz`qZT?$`^5#Q1bD#0Wr~
zG9AWs%o4=r31TlC5}Bb{Qi(~m8Zj*C%(trRg+pS}VKsbMJ=tnPv3NS~Z&42GiHcG{
z3Y;r2$?d}X{~i5@`Tv}xofMD)|4IRyuGXt1U#WWQ=;gfEHu^o?YmRg`u7ko5?U)$t
hm>X}$H&K*z&DT8dg+pS{nGZTqKLf6dObYz90w>2{6|evR

literal 0
HcmV?d00001

diff --git a/model/framework/code/main.py b/model/framework/code/main.py
index 6729254..241ed02 100644
--- a/model/framework/code/main.py
+++ b/model/framework/code/main.py
@@ -1,9 +1,11 @@
 # imports
 import os
 import csv
+import numpy as np
 import sys
 from rdkit import Chem
-from rdkit.Chem.Descriptors import MolWt
+from rdkit.Chem import rdReducedGraphs
+
 
 # parse arguments
 input_file = sys.argv[1]
@@ -13,8 +15,11 @@
 root = os.path.dirname(os.path.abspath(__file__))
 
 # my model
-def my_model(smiles_list):
-    return [MolWt(Chem.MolFromSmiles(smi)) for smi in smiles_list]
+def erg_desc(smiles_list):
+    mols = [Chem.MolFromSmiles(smi) for smi in smiles_list]
+    ergfps = [rdReducedGraphs.GetErGFingerprint(mol) for mol in mols]
+    array_ergfps = [np.array(fp) for fp in ergfps]
+    return array_ergfps
 
 
 # read SMILES from .csv file, assuming one column with header
@@ -24,16 +29,17 @@ def my_model(smiles_list):
     smiles_list = [r[0] for r in reader]
 
 # run model
-outputs = my_model(smiles_list)
+outputs = erg_desc(smiles_list)
 
 #check input and output have the same lenght
 input_len = len(smiles_list)
 output_len = len(outputs)
 assert input_len == output_len
 
+
 # write output in a .csv file
 with open(output_file, "w") as f:
     writer = csv.writer(f)
-    writer.writerow(["value"])  # header
+    writer.writerow(["erg-{}".format(i) for i in range(len(outputs[0]))])  # header
     for o in outputs:
-        writer.writerow([o])
+        writer.writerow(o)