Skip to content

Commit

Permalink
Merge pull request #226 from ncbo/csv_format_change_fix
Browse files Browse the repository at this point in the history
CSV format change fix
  • Loading branch information
alexskr authored Nov 18, 2024
2 parents db05a3b + d380a88 commit 073fae6
Show file tree
Hide file tree
Showing 4 changed files with 270 additions and 20 deletions.
24 changes: 9 additions & 15 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,18 @@ GEM
launchy (>= 2.1, < 4.0)
mail (~> 2.7)
eventmachine (1.2.7)
faraday (2.12.0)
faraday-net_http (>= 2.0, < 3.4)
json
logger
faraday-net_http (3.3.0)
net-http
ffi (1.17.0-aarch64-linux-gnu)
ffi (1.17.0-arm64-darwin)
ffi (1.17.0-x86_64-linux-gnu)
faraday (1.2.0)
multipart-post (>= 1.2, < 3)
ruby2_keywords
ffi (1.17.0)
hashie (5.0.0)
htmlentities (4.3.4)
http-accept (1.7.0)
http-cookie (1.0.7)
domain_name (~> 0.5)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
json (2.8.1)
json (2.8.2)
json_pure (2.8.1)
language_server-protocol (3.17.0.3)
launchy (3.0.1)
Expand Down Expand Up @@ -99,8 +94,7 @@ GEM
minitest (>= 2.12, < 5.0)
powerbar
multi_json (1.15.0)
net-http (0.5.0)
uri
multipart-post (2.4.1)
net-http-persistent (2.9.4)
net-imap (0.4.18)
date
Expand All @@ -126,10 +120,10 @@ GEM
mail (>= 2.0)
powerbar (2.0.1)
hashie (>= 1.1.0)
pry (0.14.2)
pry (0.15.0)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (6.0.1)
public_suffix (5.1.1)
racc (1.8.1)
rack (2.2.10)
rack-test (0.8.3)
Expand Down Expand Up @@ -167,6 +161,7 @@ GEM
rubocop-ast (1.36.1)
parser (>= 3.3.1.0)
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
rubyzip (1.3.0)
simplecov (0.22.0)
docile (~> 1.1)
Expand All @@ -186,7 +181,6 @@ GEM
timeout (0.4.2)
tzinfo (0.3.62)
unicode-display_width (2.6.0)
uri (1.0.2)
uuid (2.3.9)
macaddr (~> 1.0)

Expand Down
4 changes: 2 additions & 2 deletions lib/ontologies_linked_data/utils/ontology_csv_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def write_class(ont_class)
row[CLASS_ID] = ont_class.id

# Preferred label
row[PREF_LABEL] = ont_class.prefLabel
row[PREF_LABEL] = Array(ont_class.prefLabel).first

# Synonyms
synonyms = ont_class.synonym
Expand All @@ -50,7 +50,7 @@ def write_class(ont_class)
row[DEFINITIONS] = definitions.join('|') unless definitions.empty?

# Obsolete
row[OBSOLETE] = ont_class.obsolete
row[OBSOLETE] = Array(ont_class.obsolete).first.to_s.upcase

# CUI
cuis = ont_class.cui
Expand Down
228 changes: 228 additions & 0 deletions test/data/ontology_files/chebi_test.obo
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
format-version: 1.2
data-version: 237
date: 30:10:2024 06:16
saved-by: chebi
subsetdef: 1_STAR "Preliminary entries"
subsetdef: 2_STAR "Annotated by 3rd party"
subsetdef: 3_STAR "Manually annotated by ChEBI Team"
synonymtypedef: BRAND_NAME "BRAND NAME"
synonymtypedef: INN "INN"
synonymtypedef: IUPAC_NAME "IUPAC NAME"
default-namespace: chebi_ontology
remark: Author: ChEBI curation team
remark: ChEBI Release version 237
remark: ChEBI subsumes and replaces the Chemical Ontology first
remark: developed by Michael Ashburner & Pankaj Jaiswal.
remark: For any queries contact [email protected]
ontology: chebi

[Term]
id: CHEBI:137366
name: CHEBI:4042
is_obsolete: true

[Term]
id: CHEBI:137377
name: CHEBI:81850
is_obsolete: true

[Term]
id: CHEBI:143109
name: waterssdfsdfss
is_obsolete: true

[Term]
id: CHEBI:177198
name: CHEBI:50860
is_obsolete: true

[Term]
id: CHEBI:189822
name: testing532
is_obsolete: true

[Term]
id: CHEBI:24431
name: chemical entity
def: "A chemical entity is a physical entity of interest in chemistry including molecular entities, parts thereof, and chemical substances." []
subset: 3_STAR
synonym: "chemical entity" EXACT [UniProt]

[Term]
id: CHEBI:27189
name: unclassifieds
is_obsolete: true

[Term]
id: CHEBI:30430
name: indium atom
def: "A metallic element first identified and named from the brilliant indigo (Latin indicum) blue line in its flame spectrum." []
subset: 3_STAR
synonym: "49In" RELATED [IUPAC]
synonym: "In" RELATED [IUPAC]
synonym: "indio" RELATED [ChEBI]
synonym: "Indium" RELATED [ChEBI]
synonym: "indium" EXACT IUPAC_NAME [IUPAC]
synonym: "indium" RELATED [ChEBI]
xref: CAS:7440-74-6 {source="ChemIDplus"}
xref: CAS:7440-74-6 {source="NIST Chemistry WebBook"}
xref: Gmelin:16297 {source="Gmelin"}
xref: WebElements:In
is_a: CHEBI:33317 ! boron group element atom
property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/formula "In" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/In" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/inchikey "APFVFJFRJDLVQX-UHFFFAOYSA-N" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/mass "114.81800" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "114.90388" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/smiles "[In]" xsd:string

[Term]
id: CHEBI:33250
name: atom
alt_id: CHEBI:22671
alt_id: CHEBI:23907
def: "A chemical entity constituting the smallest component of an element having the chemical properties of the element." []
subset: 3_STAR
synonym: "atom" EXACT IUPAC_NAME [IUPAC]
synonym: "atome" RELATED [IUPAC]
synonym: "atomo" RELATED [IUPAC]
synonym: "atoms" RELATED [ChEBI]
synonym: "atomus" RELATED [ChEBI]
synonym: "element" RELATED [ChEBI]
synonym: "elements" RELATED [ChEBI]
is_a: CHEBI:24431 ! chemical entity

[Term]
id: CHEBI:33317
name: boron group element atom
subset: 3_STAR
synonym: "boron group element" RELATED [ChEBI]
synonym: "boron group elements" RELATED [ChEBI]
synonym: "Element der Borgruppe" RELATED [ChEBI]
synonym: "group 13 elements" EXACT IUPAC_NAME [IUPAC]
synonym: "group III elements" RELATED [ChEBI]
is_a: CHEBI:33560 ! p-block element atom

[Term]
id: CHEBI:33318
name: main group element atom
def: "An atom belonging to one of the main groups (found in the s- and p- blocks) of the periodic table." []
subset: 3_STAR
synonym: "Hauptgruppenelement" RELATED [ChEBI]
synonym: "Hauptgruppenelemente" RELATED [ChEBI]
synonym: "main group element" RELATED [ChEBI]
synonym: "main group elements" EXACT IUPAC_NAME [IUPAC]
is_a: CHEBI:33250 ! atom

[Term]
id: CHEBI:33560
name: p-block element atom
def: "Any main group element atom belonging to the p-block of the periodic table." []
subset: 3_STAR
synonym: "p-block element" RELATED [ChEBI]
synonym: "p-block elements" RELATED [ChEBI]
is_a: CHEBI:33318 ! main group element atom

[Term]
id: CHEBI:49631
name: gallium atom
alt_id: CHEBI:33326
alt_id: CHEBI:49630
def: "A metallic element predicted as eka-aluminium by Mendeleev in 1870 and discovered by Paul-Emile Lecoq de Boisbaudran in 1875. Named in honour of France (Latin Gallia) and perhaps also from the Latin gallus cock, a translation of Lecoq." []
subset: 3_STAR
synonym: "31Ga" RELATED [IUPAC]
synonym: "Ga" RELATED [IUPAC]
synonym: "galio" RELATED [ChEBI]
synonym: "gallium" EXACT IUPAC_NAME [IUPAC]
synonym: "gallium" RELATED [ChEBI]
xref: CAS:7440-55-3 {source="ChemIDplus"}
xref: CAS:7440-55-3 {source="NIST Chemistry WebBook"}
xref: WebElements:Ga
is_a: CHEBI:33317 ! boron group element atom
property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/formula "Ga" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/Ga" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/inchikey "GYHNNYVSQQEPJS-UHFFFAOYSA-N" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/mass "69.72300" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "68.92557" xsd:string
property_value: http://purl.obolibrary.org/obo/chebi/smiles "[Ga]" xsd:string

[Term]
id: CHEBI:64352
name: UDP-N-acetyl-D-glucosamine(2-)
is_obsolete: true

[Term]
id: CHEBI:64360
name: tocilizumab
is_obsolete: true

[Term]
id: CHEBI:64867
name: PHS C26
is_obsolete: true

[Typedef]
id: has_functional_parent
name: has functional parent
is_cyclic: false
is_transitive: false

[Typedef]
id: has_major_microspecies_at_pH_7_3
name: has major microspecies at pH 7.3
is_cyclic: true
is_transitive: false

[Typedef]
id: has_parent_hydride
name: has parent hydride
is_cyclic: false
is_transitive: false

[Typedef]
id: has_part
name: has part
xref: BFO:0000051
is_cyclic: false
is_transitive: true

[Typedef]
id: has_role
name: has role
xref: RO:0000087
is_cyclic: false
is_transitive: false

[Typedef]
id: is_conjugate_acid_of
name: is conjugate acid of
is_cyclic: true
is_transitive: false
inverse_of: is_conjugate_base_of ! is conjugate base of

[Typedef]
id: is_conjugate_base_of
name: is conjugate base of
is_cyclic: true
is_transitive: false

[Typedef]
id: is_enantiomer_of
name: is enantiomer of
is_cyclic: true
is_transitive: false

[Typedef]
id: is_substituent_group_from
name: is substituent group from
is_cyclic: false
is_transitive: false

[Typedef]
id: is_tautomer_of
name: is tautomer of
is_cyclic: true
is_transitive: true

34 changes: 31 additions & 3 deletions test/util/test_ontology_csv_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,16 @@ def self.before_suite
end

def get_csv_string
gz = Zlib::GzipReader.open(@@csv_path)
return gz.read
get_csv_string_from_path(@@csv_path)
end

def enclosed_in_square_brackets_with_quotes?(string)
/\A\[\s*(["']).*\1\s*\]\z/ === string
end

def get_csv_string_from_path(csv_path)
gz = Zlib::GzipReader.open(csv_path)
gz.read
end

def test_csv_writer_valid
Expand Down Expand Up @@ -252,7 +260,7 @@ def test_csv_writer_content_non_obsolete
classes = CSV.parse(get_csv_string, headers:true)
classes.select do |row|
if row[LinkedData::Utils::OntologyCSVWriter::PREF_LABEL] == preferred_label
assert_equal 'false', row[LinkedData::Utils::OntologyCSVWriter::OBSOLETE]
assert_equal 'false', row[LinkedData::Utils::OntologyCSVWriter::OBSOLETE].to_s.downcase
class_exists = true
end
end
Expand Down Expand Up @@ -309,4 +317,24 @@ def test_csv_writer_content_props_other

assert class_exists, %Q<Class not found: "#{preferred_label}">
end

def test_for_non_array_values
acronym = 'CHEBITEST'
sub_id = 1
submission_parse(acronym, "CHEBI Ontology TEST",
"./test/data/ontology_files/chebi_test.obo", sub_id,
process_rdf: true, index_search: true, extract_metadata: false)
sub = LinkedData::Models::OntologySubmission.where(ontology: [acronym: acronym], submissionId: sub_id)
.include(:version, :submissionId, :ontology).first
sub.ontology.bring(:acronym)
classes = CSV.parse(get_csv_string_from_path(sub.csv_path), headers:true)
assert_equal 20, classes.count

classes.each do |row|
row.each do |_, val|
assert_equal false, enclosed_in_square_brackets_with_quotes?(val), "Expected a String, but received an Array: #{val}"
end
end
end

end

0 comments on commit 073fae6

Please sign in to comment.