Skip to content

Commit

Permalink
Fix: issue when generating multilingual missing labels (#164)
Browse files Browse the repository at this point in the history
* fix the issue when generating multilingual missing labels

* fixed the issues revealed by the failing unit tests

* handle the case that in ontoportal-lirmm branch we use "@none" not "none"
  • Loading branch information
syphax-bouazzouni authored Oct 24, 2024
1 parent 0a45655 commit 793c4fd
Show file tree
Hide file tree
Showing 7 changed files with 26,209 additions and 42 deletions.
19 changes: 10 additions & 9 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: https://github.com/ontoportal-lirmm/goo.git
revision: a95245b8c964431505ca6315907440996c59a00d
revision: f8ac7b00e8d8b46d1eea04de014175525c1cdd83
branch: development
specs:
goo (0.0.2)
Expand Down Expand Up @@ -60,7 +60,7 @@ GEM
launchy (>= 2.1, < 4.0)
mail (~> 2.7)
eventmachine (1.2.7)
faraday (1.10.3)
faraday (1.10.4)
faraday-em_http (~> 1.0)
faraday-em_synchrony (~> 1.0)
faraday-excon (~> 1.1)
Expand Down Expand Up @@ -96,7 +96,7 @@ GEM
json-ld (3.0.2)
multi_json (~> 1.12)
rdf (>= 2.2.8, < 4.0)
jwt (2.9.0)
jwt (2.9.3)
base64
language_server-protocol (3.17.0.3)
launchy (3.0.1)
Expand All @@ -113,9 +113,10 @@ GEM
net-pop
net-smtp
method_source (1.1.0)
mime-types (3.5.2)
mime-types (3.6.0)
logger
mime-types-data (~> 3.2015)
mime-types-data (3.2024.0903)
mime-types-data (3.2024.1001)
mini_mime (1.1.5)
minitest (4.7.5)
minitest-reporters (0.14.24)
Expand All @@ -127,7 +128,7 @@ GEM
multipart-post (2.4.1)
net-http-persistent (4.0.4)
connection_pool (~> 2.2)
net-imap (0.4.16)
net-imap (0.4.17)
date
net-protocol
net-pop (0.1.2)
Expand Down Expand Up @@ -188,10 +189,10 @@ GEM
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
rexml (3.3.7)
rexml (3.3.8)
rsolr (1.1.2)
builder (>= 2.1.2)
rubocop (1.66.1)
rubocop (1.67.0)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
Expand Down Expand Up @@ -228,7 +229,7 @@ GEM
unicode-display_width (2.6.0)
uuid (2.3.9)
macaddr (~> 1.0)
webmock (3.23.1)
webmock (3.24.0)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def process(logger, options = {})

def handle_missing_labels(file_path, logger)
callbacks = {
include_languages: true,
missing_labels: {
op_name: 'Missing Labels Generation',
required: true,
Expand Down Expand Up @@ -59,6 +60,11 @@ def loop_classes(logger, raw_paging, submission, callbacks)
size = 2500
count_classes = 0
acr = submission.id.to_s.split("/")[-1]

# include all languages in attributes of classes if asked for
incl_lang = callbacks.delete(:include_languages)
RequestStore.store[:requested_lang] = :ALL if incl_lang

operations = callbacks.values.map { |v| v[:op_name] }.join(", ")

time = Benchmark.realtime do
Expand All @@ -78,9 +84,7 @@ def loop_classes(logger, raw_paging, submission, callbacks)
# 1. init artifacts hash if not explicitly passed in the callback
# 2. determine if class-level iteration is required
callbacks.each { |_, callback| callback[:artifacts] ||= {};
if callback[:caller_on_each]
iterate_classes = true
end }
iterate_classes = true if callback[:caller_on_each] }

process_callbacks(logger, callbacks, :caller_on_pre) {
|callable, callback| callable.call(callback[:artifacts], logger, paging) }
Expand All @@ -95,7 +99,7 @@ def loop_classes(logger, raw_paging, submission, callbacks)
page_len = page_classes.length

# nothing retrieved even though we're expecting more records
if total_pages > 0 && page_classes.empty? && (prev_page_len == -1 || prev_page_len == size)
if total_pages.positive? && page_classes.empty? && (prev_page_len == -1 || prev_page_len == size)
j = 0
num_calls = LinkedData.settings.num_retries_4store

Expand All @@ -117,7 +121,7 @@ def loop_classes(logger, raw_paging, submission, callbacks)
end

if page_classes.empty?
if total_pages > 0
if total_pages.positive?
logger.info("The number of pages reported for #{acr} - #{total_pages} is higher than expected #{page - 1}. Completing #{operations}...")
else
logger.info("Ontology #{acr} contains #{total_pages} pages...")
Expand Down Expand Up @@ -160,6 +164,7 @@ def loop_classes(logger, raw_paging, submission, callbacks)
@submission.save
end
end
RequestStore.store[:requested_lang] = nil if incl_lang
end

def generate_missing_labels_pre(artifacts = {}, logger, paging)
Expand All @@ -181,49 +186,68 @@ def generate_missing_labels_pre_page(artifacts = {}, logger, paging, page_classe
end

def generate_missing_labels_each(artifacts = {}, logger, paging, page_classes, page, c)
prefLabel = nil

if c.prefLabel.nil?
rdfs_labels = c.label
pref_label = nil
portal_lang = Goo.portal_language
pref_label_lang = c.prefLabel(include_languages: true)
no_default_pref_label = pref_label_lang.nil? || (pref_label_lang.keys & [portal_lang, :none, '@none']).empty?

if rdfs_labels && rdfs_labels.length > 1 && c.synonym.length > 0
rdfs_labels = (Set.new(c.label) - Set.new(c.synonym)).to_a.first
if no_default_pref_label
lang_rdfs_labels = c.label(include_languages: true)

rdfs_labels = c.label if rdfs_labels.nil? || rdfs_labels.length == 0
# Set lang_rdfs_labels to { none: [] } if empty or no match for default label
if Array(lang_rdfs_labels).empty? || (lang_rdfs_labels.keys & [portal_lang, :none, '@none']).empty?
lang_rdfs_labels = { none: [] }
end

rdfs_labels = [rdfs_labels] if rdfs_labels and not (rdfs_labels.instance_of? Array)
label = nil
lang_rdfs_labels.each do |lang, rdfs_labels|
# Remove synonyms from rdfs_labels if there are multiple labels and synonyms exist
if rdfs_labels&.length.to_i > 1 && c.synonym.present?
rdfs_labels = (Set.new(c.label) - Set.new(c.synonym)).to_a.first || c.label
end

if rdfs_labels && rdfs_labels.length > 0
label = rdfs_labels[0]
else
label = LinkedData::Utils::Triples.last_iri_fragment c.id.to_s
# Ensure rdfs_labels is an array
rdfs_labels = Array(rdfs_labels) if rdfs_labels && !rdfs_labels.is_a?(Array)

# Select the label: either the minimal sorted label or the last fragment of the IRI
label = rdfs_labels&.min || LinkedData::Utils::Triples.last_iri_fragment(c.id.to_s)

# Set language to nil for :none and assign pref_label
lang = nil if lang.eql?(:none) || lang.to_s.eql?('@none')
pref_label = label if lang.nil? || lang.eql?(portal_lang)
pref_label ||= label

artifacts[:label_triples] << LinkedData::Utils::Triples.label_for_class_triple(
c.id, Goo.vocabulary(:metadata_def)[:prefLabel], pref_label, lang
)
end
artifacts[:label_triples] << LinkedData::Utils::Triples.label_for_class_triple(
c.id, Goo.vocabulary(:metadata_def)[:prefLabel], label)
prefLabel = label
elsif pref_label_lang
pref_label = c.prefLabel
else
prefLabel = c.prefLabel
pref_label = LinkedData::Utils::Triples.last_iri_fragment(c.id.to_s)
end

if @submission.ontology.viewOf.nil?
loomLabel = LinkedData::Models::OntologySubmission.loom_transform_literal(prefLabel.to_s)
# Handle loom transformation if ontology is not a view
unless @submission.ontology.viewOf
loom_label = LinkedData::Models::OntologySubmission.loom_transform_literal(pref_label.to_s)

if loomLabel.length > 2
if loom_label.length > 2
artifacts[:mapping_triples] << LinkedData::Utils::Triples.loom_mapping_triple(
c.id, Goo.vocabulary(:metadata_def)[:mappingLoom], loomLabel)
c.id, Goo.vocabulary(:metadata_def)[:mappingLoom], loom_label
)
end

artifacts[:mapping_triples] << LinkedData::Utils::Triples.uri_mapping_triple(
c.id, Goo.vocabulary(:metadata_def)[:mappingSameURI], c.id)
c.id, Goo.vocabulary(:metadata_def)[:mappingSameURI], c.id
)
end

end

def generate_missing_labels_post_page(artifacts = {}, logger, paging, page_classes, page)
rest_mappings = LinkedData::Mappings.migrate_rest_mappings(@submission.ontology.acronym)
artifacts[:mapping_triples].concat(rest_mappings)

if artifacts[:label_triples].length > 0
if artifacts[:label_triples].length.positive?
logger.info("Asserting #{artifacts[:label_triples].length} labels in " +
"#{@submission.id.to_ntriples}")
logger.flush
Expand All @@ -239,7 +263,7 @@ def generate_missing_labels_post_page(artifacts = {}, logger, paging, page_class
logger.flush
end

if artifacts[:mapping_triples].length > 0
if artifacts[:mapping_triples].length.positive?
logger.info("Asserting #{artifacts[:mapping_triples].length} mappings in " +
"#{@submission.id.to_ntriples}")
logger.flush
Expand All @@ -266,4 +290,3 @@ def generate_missing_labels_post(artifacts = {}, logger, pagging)

end
end

11 changes: 9 additions & 2 deletions lib/ontologies_linked_data/utils/triples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,17 @@ def self.rdf_for_custom_properties(ont_sub)
return (triples.join "\n")
end

def self.label_for_class_triple(class_id,property,label)
def self.label_for_class_triple(class_id, property, label, language=nil)
label = label.to_s.gsub('\\','\\\\\\\\')
label = label.gsub('"','\"')
return triple(class_id,property,RDF::Literal.new(label, :datatype => RDF::XSD.string))
params = { datatype: RDF::XSD.string }
lang = language.to_s.downcase

if !lang.empty? && lang.to_sym != :none && !lang.to_s.eql?('@none')
params[:datatype] = RDF.langString
params[:language] = lang.to_sym
end
triple(class_id, property, RDF::Literal.new(label, params))
end

def self.generated_label(class_id, existing_label)
Expand Down
Loading

0 comments on commit 793c4fd

Please sign in to comment.