diff --git a/lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb b/lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb index ea692f44..975193b6 100644 --- a/lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb +++ b/lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb @@ -5,7 +5,7 @@ module Count def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr_acronyms = []) logger = nil unless enable_debug t = Time.now - latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) + latest = retrieve_latest_submissions({ acronyms: arr_acronyms }) counts = {} # Counting for External mappings t0 = Time.now @@ -34,12 +34,12 @@ def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr end # Counting for mappings between the ontologies hosted by the BioPortal appliance i = 0 - epr = Goo.sparql_query_client(:main) + Goo.sparql_query_client(:main) latest.each do |acro, sub| - self.handle_triple_store_downtime(logger) if Goo.backend_4s? + handle_triple_store_downtime(logger) if Goo.backend_4s? t0 = Time.now - s_counts = self.mapping_ontologies_count(sub, nil, reload_cache = reload_cache) + s_counts = mapping_ontologies_count(sub, nil, reload_cache = reload_cache) s_total = 0 s_counts.each do |k, v| @@ -63,7 +63,7 @@ def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr end def create_mapping_counts(logger, arr_acronyms = []) - ont_msg = arr_acronyms.empty? ? "all ontologies" : "ontologies [#{arr_acronyms.join(', ')}]" + ont_msg = arr_acronyms.empty? ? 'all ontologies' : "ontologies [#{arr_acronyms.join(', ')}]" time = Benchmark.realtime do create_mapping_count_totals_for_ontologies(logger, arr_acronyms) @@ -79,20 +79,20 @@ def create_mapping_counts(logger, arr_acronyms = []) end def create_mapping_count_totals_for_ontologies(logger, arr_acronyms) - new_counts = mapping_counts(enable_debug = true, logger = logger, reload_cache = true, arr_acronyms) + new_counts = mapping_counts(true, logger, true, arr_acronyms) persistent_counts = {} - f = Goo::Filter.new(:pair_count) == false - LinkedData::Models::MappingCount.where.filter(f) - .include(:ontologies, :count) + LinkedData::Models::MappingCount.where(pair_count: false) + .include(:ontologies, :count, :pair_count) .include(:all) .all .each do |m| persistent_counts[m.ontologies.first] = m end - latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) - delete_zombie_mapping_count(persistent_counts.values, latest.values.compact.map { |sub| sub.ontology.acronym }) + latest = retrieve_latest_submissions + delete_zombie_mapping_count(persistent_counts, latest, new_counts) + num_counts = new_counts.keys.length ctr = 0 @@ -100,47 +100,9 @@ def create_mapping_count_totals_for_ontologies(logger, arr_acronyms) new_counts.each_key do |acr| new_count = new_counts[acr] ctr += 1 - - if persistent_counts.include?(acr) - inst = persistent_counts[acr] - if new_count.zero? - inst.delete if inst.persistent? - elsif new_count != inst.count - inst.bring_remaining - inst.count = new_count - - begin - if inst.valid? - inst.save - else - logger.error("Error updating mapping count for #{acr}: #{inst.id.to_s}. #{inst.errors}") - next - end - rescue Exception => e - logger.error("Exception updating mapping count for #{acr}: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end - end - else - m = LinkedData::Models::MappingCount.new - m.ontologies = [acr] - m.pair_count = false - m.count = new_count - - begin - if m.valid? - m.save - else - logger.error("Error saving new mapping count for #{acr}. #{m.errors}") - next - end - rescue Exception => e - logger.error("Exception saving new mapping count for #{acr}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end - end + update_mapping_count(persistent_counts, new_counts, acr, acr, new_count, false) remaining = num_counts - ctr - logger.info("Total mapping count saved for #{acr}: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining..." : "All done!")) + logger.info("Total mapping count saved for #{acr}: #{new_count}. " << (remaining.positive? ? "#{remaining} counts remaining..." : 'All done!')) end end @@ -148,23 +110,17 @@ def create_mapping_count_totals_for_ontologies(logger, arr_acronyms) # ontologies to ALL other ontologies in the system def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) - latest_submissions = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms }) - all_latest_submissions = self.retrieve_latest_submissions + latest_submissions = retrieve_latest_submissions({ acronyms: arr_acronyms }) + all_latest_submissions = retrieve_latest_submissions ont_total = latest_submissions.length logger.info("There is a total of #{ont_total} ontologies to process...") ont_ctr = 0 - # filename = 'mapping_pairs.ttl' - # temp_dir = Dir.tmpdir - # temp_file_path = File.join(temp_dir, filename) - # temp_dir = '/Users/mdorf/Downloads/test/' - # temp_file_path = File.join(File.dirname(file_path), "test.ttl") - # fsave = File.open(temp_file_path, "a") + latest_submissions.each do |acr, sub| - self.handle_triple_store_downtime(logger) if Goo.backend_4s? new_counts = nil time = Benchmark.realtime do - new_counts = self.mapping_ontologies_count(sub, nil, reload_cache = true) + new_counts = mapping_ontologies_count(sub, nil, true) end logger.info("Retrieved new mapping pair counts for #{acr} in #{time} seconds.") ont_ctr += 1 @@ -176,61 +132,20 @@ def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) persistent_counts[other] = m end - delete_zombie_mapping_count(persistent_counts.values, all_latest_submissions.values.compact.map { |s| s.ontology.acronym }) + delete_zombie_mapping_count(persistent_counts, all_latest_submissions, new_counts) + num_counts = new_counts.keys.length logger.info("Ontology: #{acr}. #{num_counts} mapping pair counts to record...") - logger.info("------------------------------------------------") + logger.info('------------------------------------------------') ctr = 0 new_counts.each_key do |other| new_count = new_counts[other] ctr += 1 - - if persistent_counts.include?(other) - inst = persistent_counts[other] - if new_count.zero? - inst.delete - elsif new_count != inst.count - inst.bring_remaining if inst.persistent? - inst.pair_count = true - inst.count = new_count - - begin - if inst.valid? - inst.save() - # inst.save({ batch: fsave }) - else - logger.error("Error updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{inst.errors}") - next - end - rescue Exception => e - logger.error("Exception updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end - end - else - next unless new_counts.key?(other) - - m = LinkedData::Models::MappingCount.new - m.count = new_count - m.ontologies = [acr, other] - m.pair_count = true - begin - if m.valid? - m.save() - # m.save({ batch: fsave }) - else - logger.error("Error saving new mapping count for the pair [#{acr}, #{other}]. #{m.errors}") - next - end - rescue Exception => e - logger.error("Exception saving new mapping count for the pair [#{acr}, #{other}]. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}") - next - end - end + update_mapping_count(persistent_counts, new_counts, acr, other, new_count, true) remaining = num_counts - ctr - logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining for #{acr}..." : "All done!")) + logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << (remaining.positive? ? "#{remaining} counts remaining for #{acr}..." : 'All done!')) wait_interval = 250 next unless (ctr % wait_interval).zero? @@ -240,25 +155,43 @@ def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) sleep(sec_to_wait) end remaining_ont = ont_total - ont_ctr - logger.info("Completed processing pair mapping counts for #{acr}. " << ((remaining_ont.positive?) ? "#{remaining_ont} ontologies remaining..." : "All ontologies processed!")) + logger.info("Completed processing pair mapping counts for #{acr}. " << (remaining_ont.positive? ? "#{remaining_ont} ontologies remaining..." : 'All ontologies processed!')) end - # fsave.close end private - def delete_zombie_mapping_count(existent_counts, submissions_ready) - special_mappings = ["http://data.bioontology.org/metadata/ExternalMappings", - "http://data.bioontology.org/metadata/InterportalMappings/agroportal", - "http://data.bioontology.org/metadata/InterportalMappings/ncbo", - "http://data.bioontology.org/metadata/InterportalMappings/sifr"] + def update_mapping_count(persistent_counts, new_counts, acr, other, new_count, pair_count) + if persistent_counts.include?(other) + inst = persistent_counts[other] + if new_count.zero? + inst.delete + elsif new_count != inst.count + inst.pair_count = true + inst.count = new_count + inst.save + end + else + return unless new_counts.key?(other) + + m = LinkedData::Models::MappingCount.new + m.count = new_count + m.ontologies = if pair_count + [acr, other] + else + [acr] + end + m.pair_count = pair_count + m.save + end + end - existent_counts.each do |mapping| - next if mapping.ontologies.size == 1 && !(mapping.ontologies & special_mappings).empty? - next if mapping.ontologies.all? { |x| submissions_ready.include?(x) } - next unless mapping.persistent? + def delete_zombie_mapping_count(persistent_counts, all_latest_submissions, new_counts) + persistent_counts.each do |k, v| + next if all_latest_submissions.key?(k) && new_counts.key?(k) - mapping.delete + v.delete + persistent_counts.delete(k) end end end