Skip to content

Commit

Permalink
Feature: Parse xml diff files into a model (#161)
Browse files Browse the repository at this point in the history
* implement parse diff xml file into an openstruct

* add diff newAnnotation and deletedAnnotation to the parsed diff object
  • Loading branch information
syphax-bouazzouni authored Oct 19, 2024
1 parent 25819bc commit 0a45655
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 36 deletions.
58 changes: 30 additions & 28 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,22 @@ GEM
bcrypt (3.1.20)
bigdecimal (3.1.8)
builder (3.3.0)
childprocess (5.1.0)
logger (~> 1.5)
coderay (1.1.3)
concurrent-ruby (1.3.3)
concurrent-ruby (1.3.4)
connection_pool (2.4.1)
crack (1.0.0)
bigdecimal
rexml
cube-ruby (0.0.3)
daemons (1.4.1)
date (3.3.4)
docile (1.4.0)
docile (1.4.1)
domain_name (0.6.20240107)
email_spec (2.2.2)
email_spec (2.3.0)
htmlentities (~> 4.3.3)
launchy (~> 2.1)
launchy (>= 2.1, < 4.0)
mail (~> 2.7)
eventmachine (1.2.7)
faraday (1.10.3)
Expand All @@ -76,32 +78,33 @@ GEM
faraday-httpclient (1.0.1)
faraday-multipart (1.0.4)
multipart-post (~> 2)
faraday-net_http (1.0.1)
faraday-net_http (1.0.2)
faraday-net_http_persistent (1.2.0)
faraday-patron (1.0.0)
faraday-rack (1.0.0)
faraday-retry (1.0.3)
ffi (1.16.3)
hashdiff (1.1.0)
hashdiff (1.1.1)
hashie (5.0.0)
htmlentities (4.3.4)
http-accept (1.7.0)
http-cookie (1.0.6)
http-cookie (1.0.7)
domain_name (~> 0.5)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
json (2.7.2)
json-ld (3.0.2)
multi_json (~> 1.12)
rdf (>= 2.2.8, < 4.0)
jwt (2.8.2)
jwt (2.9.0)
base64
language_server-protocol (3.17.0.3)
launchy (2.5.2)
launchy (3.0.1)
addressable (~> 2.8)
childprocess (~> 5.0)
libxml-ruby (2.9.0)
link_header (0.0.8)
logger (1.6.0)
logger (1.6.1)
macaddr (1.7.2)
systemu (~> 2.6.5)
mail (2.8.1)
Expand All @@ -112,7 +115,7 @@ GEM
method_source (1.1.0)
mime-types (3.5.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2024.0604)
mime-types-data (3.2024.0903)
mini_mime (1.1.5)
minitest (4.7.5)
minitest-reporters (0.14.24)
Expand All @@ -122,9 +125,9 @@ GEM
powerbar
multi_json (1.15.0)
multipart-post (2.4.1)
net-http-persistent (4.0.2)
net-http-persistent (4.0.4)
connection_pool (~> 2.2)
net-imap (0.4.14)
net-imap (0.4.16)
date
net-protocol
net-pop (0.1.2)
Expand All @@ -134,12 +137,14 @@ GEM
net-smtp (0.5.0)
net-protocol
netrc (0.11.0)
oj (3.16.4)
oj (3.16.6)
bigdecimal (>= 3.0)
ostruct (>= 0.2)
omni_logger (0.1.4)
logger
parallel (1.25.1)
parser (3.3.3.0)
ostruct (0.6.0)
parallel (1.26.3)
parser (3.3.5.0)
ast (~> 2.4.1)
racc
pony (1.13.1)
Expand All @@ -150,7 +155,7 @@ GEM
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.1.1)
racc (1.8.0)
racc (1.8.1)
rack (1.6.13)
rack-test (0.8.3)
rack (>= 1.0, < 3)
Expand All @@ -171,7 +176,7 @@ GEM
rdf-xsd (3.2.1)
rdf (~> 3.2)
rexml (~> 3.2)
redis (5.2.0)
redis (5.3.0)
redis-client (>= 0.22.0)
redis-client (0.22.2)
connection_pool
Expand All @@ -183,22 +188,20 @@ GEM
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
rexml (3.3.1)
strscan
rexml (3.3.7)
rsolr (1.1.2)
builder (>= 2.1.2)
rubocop (1.64.1)
rubocop (1.66.1)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml (>= 3.2.5, < 4.0)
rubocop-ast (>= 1.31.1, < 2.0)
regexp_parser (>= 2.4, < 3.0)
rubocop-ast (>= 1.32.2, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.3)
rubocop-ast (1.32.3)
parser (>= 3.3.1.0)
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
Expand All @@ -210,9 +213,8 @@ GEM
simplecov-cobertura (2.1.0)
rexml
simplecov (~> 0.19)
simplecov-html (0.12.3)
simplecov-html (0.13.1)
simplecov_json_formatter (0.1.4)
strscan (3.1.0)
systemu (2.6.5)
test-unit-minitest (0.9.1)
minitest (~> 4.7)
Expand All @@ -223,7 +225,7 @@ GEM
thread_safe (0.3.6)
timeout (0.4.1)
tzinfo (0.3.62)
unicode-display_width (2.5.0)
unicode-display_width (2.6.0)
uuid (2.3.9)
macaddr (~> 1.0)
webmock (3.23.1)
Expand Down
90 changes: 90 additions & 0 deletions lib/ontologies_linked_data/models/concerns/parse_diff_file.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
require 'libxml'

module LinkedData
module Concerns
module SubmissionDiffParser

class DiffReport
attr_accessor :summary, :changed_classes, :new_classes, :deleted_classes

def initialize(summary, changed_classes, new_classes, deleted_classes)
@summary = summary
@changed_classes = changed_classes
@new_classes = new_classes
@deleted_classes = deleted_classes
end
end

class DiffSummary
attr_accessor :number_changed_classes, :number_new_classes, :number_deleted_classes

def initialize(number_changed_classes, number_new_classes, number_deleted_classes)
@number_changed_classes = number_changed_classes
@number_new_classes = number_new_classes
@number_deleted_classes = number_deleted_classes
end
end

class ChangedClass
attr_accessor :class_iri, :class_labels, :new_axioms, :new_annotations, :deleted_annotations, :deleted_axioms

def initialize(class_iri, class_labels, new_axioms, new_annotations, deleted_axioms, deleted_annotations)
@class_iri = class_iri
@class_labels = class_labels
@new_axioms = new_axioms
@deleted_axioms = deleted_axioms
@new_annotations = new_annotations
@deleted_annotations = deleted_annotations
end
end

class NewClass < ChangedClass; end

class DeletedClass < ChangedClass; end

def parse_diff_report(xml_file = self.diffFilePath)
parser = LibXML::XML::Parser.file(xml_file)
doc = parser.parse

# Parse summary
summary = doc.find_first('//diffSummary')
diff_summary = DiffSummary.new(
summary.find_first('numberChangedClasses').content.to_i,
summary.find_first('numberNewClasses').content.to_i,
summary.find_first('numberDeletedClasses').content.to_i
)

# Parse changed classes
changed_classes = doc.find('//changedClasses/changedClass').map do |node|
extract_changes_details ChangedClass, node
end

# Parse new classes
new_classes = doc.find('//newClasses/newClass').map do |node|
extract_changes_details NewClass, node
end

# Parse deleted classes
deleted_classes = doc.find('//deletedClasses/deletedClass').map do |node|
extract_changes_details DeletedClass, node
end

# Create the DiffReport object
DiffReport.new(diff_summary, changed_classes, new_classes, deleted_classes)
end

def extract_changes_details(klass, node)
class_iri = node.find_first('classIRI').content.strip
class_labels = node.find('classLabel').map(&:content)
new_axioms = node.find('newAxiom').map(&:content)
new_annotations = node.find('newAnnotation').map(&:content)
deleted_axioms = node.find('deletedAxiom').map(&:content)
deleted_annotations = node.find('deletedAnnotation').map(&:content)


klass.new(class_iri, class_labels, new_axioms, new_annotations, deleted_annotations, deleted_axioms)
end
end
end
end

11 changes: 3 additions & 8 deletions lib/ontologies_linked_data/models/ontology_submission.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ class OntologySubmission < LinkedData::Models::Base
include LinkedData::Concerns::OntologySubmission::Validators
include LinkedData::Concerns::OntologySubmission::UpdateCallbacks
extend LinkedData::Concerns::OntologySubmission::DefaultCallbacks

include LinkedData::Concerns::SubmissionDiffParser
include SKOS::ConceptSchemes
include SKOS::RootsFetcher


FLAT_ROOTS_LIMIT = 1000

model :ontology_submission, scheme: File.join(__dir__, '../../../config/schemes/ontology_submission.yml'),
Expand Down Expand Up @@ -125,7 +124,7 @@ class OntologySubmission < LinkedData::Models::Base
attribute :openSearchDescription, namespace: :void, type: :uri, default: -> (s) { open_search_default(s) }
attribute :source, namespace: :dct, type: :list
attribute :endpoint, namespace: :sd, type: %i[uri list],
default: ->(s) { default_sparql_endpoint(s)}
default: ->(s) { default_sparql_endpoint(s) }
attribute :includedInDataCatalog, namespace: :schema, type: %i[list uri]

# Relations
Expand Down Expand Up @@ -299,7 +298,7 @@ def self.clear_indexed_content(ontology)
begin
conn.delete_by_query("ontology_t:\"#{ontology}\"")
rescue StandardError => e
#puts e.message
# puts e.message
end
conn
end
Expand Down Expand Up @@ -479,9 +478,6 @@ def unzip_submission(logger)
zip_dst
end




def class_count(logger = nil)
logger ||= LinkedData::Parser.logger || Logger.new($stderr)
count = -1
Expand Down Expand Up @@ -539,7 +535,6 @@ def metrics_from_file(logger = nil)
metrics
end


def add_submission_status(status)
valid = status.is_a?(LinkedData::Models::SubmissionStatus)
raise ArgumentError, "The status being added is not SubmissionStatus object" unless valid
Expand Down

0 comments on commit 0a45655

Please sign in to comment.