-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from GeneDx/devel_0.2.0.RC
Devel 0.2.0.rc
- Loading branch information
Showing
25 changed files
with
1,856 additions
and
439 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
include txt2hpo/data/*.wv.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz#egg=en_core_sci_sm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,7 @@ | |
name=__project__, | ||
packages=find_packages(), | ||
version=__version__, | ||
description='Phenotype extraction using Named Entity Recognition', | ||
description='HPO concept recognition and phenotype extraction tool', | ||
long_description=long_description, | ||
long_description_content_type='text/markdown', | ||
author='Vlad Gainullin <[email protected]>, Kevin Arvai <[email protected]>', | ||
|
@@ -27,7 +27,13 @@ | |
'pandas', | ||
'nltk', | ||
'spacy', | ||
'networkx' | ||
'scispacy', | ||
'negspacy', | ||
'networkx', | ||
'gensim', | ||
'en_core_sci_sm' | ||
|
||
] | ||
|
||
], | ||
dependency_links=['https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz#egg=en_core_sci_sm'] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
import time | ||
|
||
from txt2hpo.build_tree import build_search_tree | ||
|
||
|
||
class BuildTreeTestCase(unittest.TestCase): | ||
def setUp(self): | ||
self.startTime = time.time() | ||
|
||
def tearDown(self): | ||
t = time.time() - self.startTime | ||
print('%s: %.3f' % (self.id(), t)) | ||
|
||
def test_build_search_tree(self): | ||
custom_synonyms = {"HP:0001263": ['DD', 'GDD']} | ||
search_tree = build_search_tree(custom_synonyms) | ||
self.assertEqual(search_tree['dd'], {1: {'dd': ['HP:0001263']}}) | ||
self.assertEqual(search_tree['gdd'], {1: {'gdd': ['HP:0001263']}}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# Phenotype-distance/coocurance test cases | ||
|
||
test_case0 = """[{"hpid": ["HP:0001290"], "index": [895, 904], "matched": "hypotonia"}, | ||
{"hpid": ["HP:0000218"], "index": [3184, 3195], "matched": "high palate"}]""" | ||
|
||
test_case1 = """[{"hpid": ["HP:0001290"], "index": [895, 904], "matched": "hypotonia"}, | ||
{"hpid": ["HP:0002014"], "index": [1597, 1605], "matched": "diarrhea"}, | ||
{"hpid": ["HP:0000218"], "index": [3184, 3195], "matched": "high palate"}]""" | ||
|
||
test_case2 = """[{"hpid": ["HP:0001290"], "index": [895, 904], "matched": "hypotonia"}, | ||
{"hpid": ["HP:0001290"], "index": [1095, 1104], "matched": "hypotonia"}, | ||
{"hpid": ["HP:0002014"], "index": [1597, 1605], "matched": "diarrhea"}, | ||
{"hpid": ["HP:0000218"], "index": [3184, 3195], "matched": "high palate"}]""" | ||
|
||
|
||
test_case10 = """[{"hpid": ["HP:0001290"], "index": [895, 904], "matched": "hypotonia"}, | ||
{"hpid": ["HP:0001250"], "index": [912, 920], "matched": "epilepsy"}, | ||
{"hpid": ["HP:0000717"], "index": [1143, 1149], "matched": "autism"}, | ||
{"hpid": ["HP:0000729", "HP:0001631"], | ||
"index": [1169, 1172], | ||
"matched": "ASD"}, | ||
{"hpid": ["HP:0000752"], "index": [1234, 1247], "matched": "hyperactivity"}, | ||
{"hpid": ["HP:0011856"], "index": [1418, 1422], "matched": "pica"}, | ||
{"hpid": ["HP:0000718"], "index": [1433, 1443], "matched": "aggression"}, | ||
{"hpid": ["HP:0000739"], "index": [1480, 1487], "matched": "anxiety"}, | ||
{"hpid": ["HP:0002019"], "index": [1569, 1581], "matched": "constipation"}, | ||
{"hpid": ["HP:0002014"], "index": [1597, 1605], "matched": "diarrhea"}, | ||
{"hpid": ["HP:0000486"], "index": [1701, 1711], "matched": "strabismus"}, | ||
{"hpid": ["HP:0002650"], "index": [1819, 1828], "matched": "scoliosis"}, | ||
{"hpid": ["HP:0001371"], "index": [1836, 1848], "matched": "contractures"}, | ||
{"hpid": ["HP:0000998"], "index": [1900, 1914], "matched": "hypertrichosis"}, | ||
{"hpid": ["HP:0001363"], | ||
"index": [2060, 2076], | ||
"matched": "craniosynostosis"}, | ||
{"hpid": ["HP:0001363"], | ||
"index": [2231, 2247], | ||
"matched": "craniosynostosis"}, | ||
{"hpid": ["HP:0001363"], | ||
"index": [2385, 2401], | ||
"matched": "craniosynostosis"}, | ||
{"hpid": ["HP:0000252"], "index": [2495, 2507], "matched": "microcephaly"}, | ||
{"hpid": ["HP:0025502"], "index": [2638, 2648], "matched": "overweight"}, | ||
{"hpid": ["HP:0000581"], | ||
"index": [2935, 2951], | ||
"matched": "blepharophimosis"}, | ||
{"hpid": ["HP:0000506"], "index": [2959, 2970], "matched": "telecanthus"}, | ||
{"hpid": ["HP:0008551"], "index": [3203, 3211], "matched": "microtia"}, | ||
{"hpid": ["HP:0000508"], "index": [3282, 3288], "matched": "ptosis"}, | ||
{"hpid": ["HP:0001270"], "index": [609, 620], "matched": "motor delay"}, | ||
{"hpid": ["HP:0000729"], | ||
"index": [1143, 1167], | ||
"matched": "autism spectrum disorder"}, | ||
{"hpid": ["HP:0000736"], | ||
"index": [1391, 1416], | ||
"matched": "were short attention span"}, | ||
{"hpid": ["HP:0000478"], | ||
"index": [1651, 1668], | ||
"matched": "eye abnormalities"}, | ||
{"hpid": ["HP:0000539"], | ||
"index": [1670, 1692], | ||
"matched": "refraction abnormality"}, | ||
{"hpid": ["HP:0001382", "HP:0002761"], | ||
"index": [1752, 1771], | ||
"matched": "joint hypermobility"}, | ||
{"hpid": ["HP:0001763"], "index": [1780, 1790], "matched": "pes planus"}, | ||
{"hpid": ["HP:0040083"], "index": [1799, 1810], "matched": "toe walking"}, | ||
{"hpid": ["HP:0000403"], | ||
"index": [1870, 1892], | ||
"matched": "recurrent otitis media"}, | ||
{"hpid": ["HP:0001609"], "index": [1926, 1938], "matched": "hoarse voice"}, | ||
{"hpid": ["HP:0004322"], | ||
"index": [2458, 2475], | ||
"matched": "Short stature was"}, | ||
{"hpid": ["HP:0004325"], "index": [2586, 2601], "matched": "low body weight"}, | ||
{"hpid": ["HP:0025502"], "index": [2633, 2648], "matched": "were overweight"}, | ||
{"hpid": ["HP:0000581"], | ||
"index": [2930, 2951], | ||
"matched": "were blepharophimosis"}, | ||
{"hpid": ["HP:0000426"], | ||
"index": [2978, 3000], | ||
"matched": "prominent nasal bridge"}, | ||
{"hpid": ["HP:0000455"], "index": [3008, 3023], "matched": "broad nasal tip"}, | ||
{"hpid": ["HP:0000582"], | ||
"index": [3074, 3103], | ||
"matched": "upslanting palpebral fissures"}, | ||
{"hpid": ["HP:0400000"], "index": [3123, 3132], "matched": "tall chin"}, | ||
{"hpid": ["HP:0000286"], | ||
"index": [3140, 3156], | ||
"matched": "epicanthal folds"}, | ||
{"hpid": ["HP:0000160"], "index": [3164, 3176], "matched": "narrow mouth"}, | ||
{"hpid": ["HP:0000218"], "index": [3184, 3195], "matched": "high palate"}, | ||
{"hpid": ["HP:0000358"], | ||
"index": [3233, 3257], | ||
"matched": "posteriorly rotated ears"}, | ||
{"hpid": ["HP:0000276"], "index": [3265, 3274], "matched": "long face"}]""" | ||
|
||
|
||
test_case11_text = """Collin et al. (2008) examined 6 affected individuals from 3 families with retinitis pigmentosa, | ||
all but 1 of whom displayed characteristic RP abnormalities including night blindness as the initial symptom, | ||
retinal bone-spicule pigmentation and attenuated retinal vessels, constriction of visual fields, | ||
and a nonrecordable ERG or ERG responses in a rod-cone pattern. | ||
Two unrelated patients had posterior subcapsular cataracts. | ||
The authors observed differences in the photoreceptor dystrophy between the families: | ||
in 1 patient from family A, the cones were more severely affected than the rods (cone-rod pattern) | ||
and kinetic visual fields were not constricted but showed bilateral central scotomas; | ||
fundus examination revealed central abnormalities at the level of the retinal pigment epithelium | ||
and moderate attenuation of retinal vessels. Her 60-year-old brother also had central fundus lesions, | ||
but his ERG showed neither rod nor cone activity. Family B had relatively late onset of a classic form of RP, | ||
with preservation of central vision. The proband in family C, who was the youngest patient in the study, | ||
was the only one who was legally blind, due to severely constricted visual fields" | ||
""" | ||
|
||
test_case11_out = [{"hpid": ["HP:0032118"], "index": [74, 83], "matched": "retinitis"}, | ||
{"hpid": ["HP:0000510"], "index": [74, 94], "matched": "retinitis pigmentosa"}, | ||
{"hpid": ["HP:0000662"], "index": [166, 181], "matched": "night blindness"}, | ||
{"hpid": ["HP:0000618"], "index": [172, 181], "matched": "blindness"}, | ||
{"hpid": ["HP:0001133"], "index": [272, 301], "matched": "constriction of visual fields"}, | ||
{"hpid": ["HP:0007787"], "index": [394, 425], "matched": "posterior subcapsular cataracts"}, | ||
{"hpid": ["HP:0000523"], "index": [404, 425], "matched": "subcapsular cataracts"}, | ||
{"hpid": ["HP:0000518"], "index": [416, 425], "matched": "cataracts"}, | ||
{"hpid": ["HP:0000603"], "index": [680, 695], "matched": "central scotoma"}, | ||
{"hpid": ["HP:0000575"], "index": [688, 695], "matched": "scotoma"}, | ||
|
||
] |
Oops, something went wrong.