From 3064b3917082b93ae087a0ba18fa3b8ffdb06fbe Mon Sep 17 00:00:00 2001 From: Tanjin He Date: Thu, 11 Mar 2021 14:36:20 -0800 Subject: [PATCH 1/8] add model for transformers v4.2.2 --- .../models/matIdentification/model_config.pkl | 2 +- .../matIdentification/opt_cp/cp.ckpt.data-00000-of-00001 | 4 ++-- .../models/matIdentification/opt_cp/cp.ckpt.index | 4 ++-- .../models/matRecognition/model_config.pkl | 2 +- .../models/matRecognition/opt_cp/cp.ckpt.data-00000-of-00001 | 4 ++-- .../models/matRecognition/opt_cp/cp.ckpt.index | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/materials_entity_recognition/models/matIdentification/model_config.pkl b/materials_entity_recognition/models/matIdentification/model_config.pkl index d077dc9..0548a66 100644 --- a/materials_entity_recognition/models/matIdentification/model_config.pkl +++ b/materials_entity_recognition/models/matIdentification/model_config.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:50951ce71ad22d9e7d62f280e28f29a44dfc77ec6fc758b28a17a11d2b5dd0bd +oid sha256:0061c2d35240b0fcd14280d18c0f6430249a3c0d496674c42d35b51c3dbcc379 size 1028 diff --git a/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.data-00000-of-00001 b/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.data-00000-of-00001 index c114ed2..8333a5d 100644 --- a/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.data-00000-of-00001 +++ b/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.data-00000-of-00001 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cea0748b83f64c2fd2178825429ae202bdc26723aab75c64f08255edb051d00 -size 780064378 +oid sha256:e778f9fb0e33122d34255f3262656596c102114879eeb8a3adcb4a5959ddb8ba +size 780064586 diff --git a/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.index b/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.index index a41a815..2eaf8fd 100644 --- a/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.index +++ b/materials_entity_recognition/models/matIdentification/opt_cp/cp.ckpt.index @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c17e458f2fdaeb2340bf62f350edd6e3d1d7b9ccb38d34480d8ffce98c300b0 -size 31682 +oid sha256:016b1820ce2438c6dc7f1341b64460cbe93e03be2f3977091d1d20e87547f329 +size 31670 diff --git a/materials_entity_recognition/models/matRecognition/model_config.pkl b/materials_entity_recognition/models/matRecognition/model_config.pkl index d2ebbce..2c26f8d 100644 --- a/materials_entity_recognition/models/matRecognition/model_config.pkl +++ b/materials_entity_recognition/models/matRecognition/model_config.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:55f855c686431ecda8cbe71ef8928ab716a89ee6b4d5bea22a2ce9d0044679cb +oid sha256:19d59c6f7a5442e6fefe49d4e4e512583c2d23ffd95c01cdbb7b6fb13858f8a4 size 1037 diff --git a/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.data-00000-of-00001 b/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.data-00000-of-00001 index 97a8dd7..1f5c046 100644 --- a/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.data-00000-of-00001 +++ b/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.data-00000-of-00001 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:612db849581ece93bb21186de71f269ca532fa389ac0f0ed83e06875397aaeb2 -size 784753594 +oid sha256:f6ba8a2f418dea85cfb768659e739666cf229bf6d919a7002005716676560c22 +size 784753802 diff --git a/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.index b/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.index index 16c6a0d..2237266 100644 --- a/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.index +++ b/materials_entity_recognition/models/matRecognition/opt_cp/cp.ckpt.index @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1d999466afb2d1f215695f9d60b04823cc6fa602a0b4e82802f708a0ada0a6f -size 31694 +oid sha256:ab7e0d97d9b32a243875f4dc3f8541e393215991a41787df3d3fc59f084fc628 +size 31682 From d853d06a3adfd9e98fd17f1cd0ec98552bc3ef39 Mon Sep 17 00:00:00 2001 From: Olga Kononova Date: Thu, 11 Mar 2021 21:26:07 -0800 Subject: [PATCH 2/8] Adjusted transformer version --- requirement.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirement.txt b/requirement.txt index e57a36f..b790510 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,8 +1,8 @@ tensorflow>=2.3.0 tensorflow-addons>=0.12.0 -transformers>=4.3.2 +transformers>=4.2.0 torch spacy chemdataextractor numpy -psutil \ No newline at end of file +psutil From b62f8e0fd7e7f64b30390ddeb2c8b35250d1c362 Mon Sep 17 00:00:00 2001 From: Olga Kononova Date: Thu, 11 Mar 2021 21:59:02 -0800 Subject: [PATCH 3/8] Fixed transformer version to match SpaCy --- requirement.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirement.txt b/requirement.txt index b790510..b7a859e 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,6 +1,6 @@ tensorflow>=2.3.0 tensorflow-addons>=0.12.0 -transformers>=4.2.0 +transformers==4.2.2 torch spacy chemdataextractor From f2df9f5b3687100a392553991fa7df5ad3db71f5 Mon Sep 17 00:00:00 2001 From: Olga Kononova Date: Fri, 12 Mar 2021 14:16:55 -0800 Subject: [PATCH 4/8] Adjusted tensorflow version --- requirement.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirement.txt b/requirement.txt index b7a859e..27a5299 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,4 +1,4 @@ -tensorflow>=2.3.0 +tensorflow==2.3.0 tensorflow-addons>=0.12.0 transformers==4.2.2 torch From 1556d0e0e986864b52aaa5406899a69d52a68942 Mon Sep 17 00:00:00 2001 From: Tanjin He Date: Fri, 12 Mar 2021 15:03:38 -0800 Subject: [PATCH 5/8] change tensorflow and transformers to old versions --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 17e4ae4..459828e 100644 --- a/setup.py +++ b/setup.py @@ -13,12 +13,12 @@ packages=find_packages(), include_package_data=True, install_requires=[ - 'tensorflow>=2.3.0', + 'tensorflow==2.3.0', 'tensorflow-addons>=0.12.0', 'spacy', 'chemdataextractor', 'numpy', - 'transformers>=4.3.2', + 'transformers==4.2.2', 'torch', 'psutil', ], From 14b6a8b6759679731f5e939cf685d1416030e917 Mon Sep 17 00:00:00 2001 From: Tanjin He Date: Fri, 12 Mar 2021 21:53:34 -0800 Subject: [PATCH 6/8] remove torch which is only used in training --- requirement.txt | 1 - setup.py | 1 - 2 files changed, 2 deletions(-) diff --git a/requirement.txt b/requirement.txt index 27a5299..205e6ad 100644 --- a/requirement.txt +++ b/requirement.txt @@ -1,7 +1,6 @@ tensorflow==2.3.0 tensorflow-addons>=0.12.0 transformers==4.2.2 -torch spacy chemdataextractor numpy diff --git a/setup.py b/setup.py index 459828e..64a24c4 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,6 @@ 'chemdataextractor', 'numpy', 'transformers==4.2.2', - 'torch', 'psutil', ], zip_safe=False) From ed811d1a0aea78bd2cb31eff9def476f944e21d1 Mon Sep 17 00:00:00 2001 From: Tanjin He Date: Sat, 13 Mar 2021 12:39:25 -0800 Subject: [PATCH 7/8] not to create checkpoint folder if reloading --- materials_entity_recognition/scripts/mat_models.py | 1 + materials_entity_recognition/scripts/model_framework.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/materials_entity_recognition/scripts/mat_models.py b/materials_entity_recognition/scripts/mat_models.py index c56bf0f..19d77b0 100644 --- a/materials_entity_recognition/scripts/mat_models.py +++ b/materials_entity_recognition/scripts/mat_models.py @@ -419,6 +419,7 @@ def matTP_identify_sents(self, input_sents): ) if self.prefetch_size > 0: data_X.prefetch(self.prefetch_size) + # Prediction all_y_preds = self.model.predict_label(x_batches=data_X) diff --git a/materials_entity_recognition/scripts/model_framework.py b/materials_entity_recognition/scripts/model_framework.py index cd8183a..1644832 100644 --- a/materials_entity_recognition/scripts/model_framework.py +++ b/materials_entity_recognition/scripts/model_framework.py @@ -105,11 +105,11 @@ def __init__(self, self.parameters_path = os.path.join(self.model_path, 'parameters.pkl') self.last_cp_dir = os.path.join(self.model_path, 'last_cp') - if not os.path.exists(self.last_cp_dir): + if not (to_reload_model or os.path.exists(self.last_cp_dir)): os.makedirs(self.last_cp_dir) self.last_cp_path = os.path.join(self.last_cp_dir, 'cp.ckpt') self.opt_cp_dir = os.path.join(self.model_path, 'opt_cp') - if not os.path.exists(self.opt_cp_dir): + if not (to_reload_model or os.path.exists(self.opt_cp_dir)): os.makedirs(self.opt_cp_dir) self.opt_cp_path = os.path.join(self.opt_cp_dir, 'cp.ckpt') From 529a82ac876fc883ff6109a24abb7b769aa324db Mon Sep 17 00:00:00 2001 From: olga Date: Thu, 25 Mar 2021 08:54:27 -0700 Subject: [PATCH 8/8] Adjusted import of Material Parser to use old version --- materials_entity_recognition/scripts/sent_ele_func.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/materials_entity_recognition/scripts/sent_ele_func.py b/materials_entity_recognition/scripts/sent_ele_func.py index 0dbe53c..b3ee93f 100644 --- a/materials_entity_recognition/scripts/sent_ele_func.py +++ b/materials_entity_recognition/scripts/sent_ele_func.py @@ -7,8 +7,9 @@ # constant if found_package('material_parser'): - from material_parser.material_parser import MaterialParser + from material_parser.material_parser_old import MaterialParser mp = MaterialParser(pubchem_lookup=False) + print ("Using Material Parser (old version).") allNonMetalElements = set(['C', 'H', 'O', 'N', 'Cl', 'F', 'P', 'S', 'Br', 'I', 'Se'] + ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn']) # element table by symbol of elements elementTable = {