From 8484cd23712dc05be42af95c50618b6280ad6f75 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 15 Apr 2013 15:23:19 -0700 Subject: [PATCH 01/75] some compatiblities fixes for 4store --- lib/sparql/client.rb | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index ca544d41..661f7750 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -243,6 +243,7 @@ def query(query, options = {}) # @see http://www.w3.org/TR/sparql11-protocol/#update-operation def update(query, options = {}) @op = :update + options[:op] = :update parse_response(response(query, options), options) self end @@ -257,9 +258,10 @@ def update(query, options = {}) # @option options [Hash] :headers # @return [String] def response(query, options = {}) + op = options[:op] || :query headers = options[:headers] || {} headers['Accept'] = options[:content_type] if options[:content_type] - request(query, headers) do |response| + request(query,op,headers) do |response| case response when Net::HTTPBadRequest # 400 Bad Request raise MalformedQuery.new(response.body) @@ -462,9 +464,9 @@ def http_klass(scheme) # @yieldparam [Net::HTTPResponse] response # @return [Net::HTTPResponse] # @see http://www.w3.org/TR/sparql11-protocol/#query-operation - def request(query, headers = {}, &block) + def request(query, headers = {}, op = :query, &block) method = (self.options[:method] || DEFAULT_METHOD).to_sym - request = send("make_#{method}_request", query, headers) + request = send("make_#{method}_request", query,op , headers) request.basic_auth(url.user, url.password) if url.user && !url.user.empty? @@ -483,9 +485,9 @@ def request(query, headers = {}, &block) # @param [Hash{String => String}] headers # @return [Net::HTTPRequest] # @see http://www.w3.org/TR/sparql11-protocol/#query-via-get - def make_get_request(query, headers = {}) + def make_get_request(query,op = :query, headers = {}) url = self.url.dup - url.query_values = (url.query_values || {}).merge(:query => query.to_s) + url.query_values = (url.query_values || {}).merge(op => query.to_s) request = Net::HTTP::Get.new(url.request_uri, self.headers.merge(headers)) request end @@ -498,12 +500,17 @@ def make_get_request(query, headers = {}) # @return [Net::HTTPRequest] # @see http://www.w3.org/TR/sparql11-protocol/#query-via-post-direct # @see http://www.w3.org/TR/sparql11-protocol/#query-via-post-urlencoded - def make_post_request(query, headers = {}) + def make_post_request(query, headers = {}, op = :query) request = Net::HTTP::Post.new(self.url.request_uri, self.headers.merge(headers)) case (self.options[:protocol] || DEFAULT_PROTOCOL).to_s when '1.1' - request['Content-Type'] = 'application/sparql-' + (@op || :query).to_s - request.body = query.to_s + if self.options['Content-Type'] == "application/x-www-form-urlencoded" + request['Content-Type'] = "application/x-www-form-urlencoded" + request.set_form_data(op => query.to_s) + else + request['Content-Type'] = 'application/sparql-' + (@op || :query).to_s + request.body = query.to_s + end when '1.0' request.set_form_data(:query => query.to_s) else From a27dd6071888c3a7526966d76aa8d7e36b527e54 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 23 Apr 2013 16:02:52 -0700 Subject: [PATCH 02/75] support multiple FROM statements in query --- lib/sparql/client/query.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 98b11b0b..a070704f 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -130,10 +130,8 @@ def construct(*patterns) # @see http://www.w3.org/TR/sparql11-query/#specifyingDataset def from(uri) options[:from] = uri - self end - - ## + # @param [Array] patterns # @return [Query] # @see http://www.w3.org/TR/sparql11-query/#GraphPattern @@ -335,7 +333,14 @@ def to_s buffer << '}' end - buffer << "FROM #{SPARQL::Client.serialize_value(options[:from])}" if options[:from] + from = options[:from] + if from + binding.pry + from = from.instance_of?(Array) ? options[:from] : [options[:from]] + options[:from].each do |from| + buffer << "FROM #{SPARQL::Client.serialize_value(from)}" + end + end unless patterns.empty? && form == :describe buffer << 'WHERE {' From 147e815e9fb2ab7913b59d2a01311c39c6917239 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 8 May 2013 10:05:06 -0700 Subject: [PATCH 03/75] union support --- lib/sparql/client/query.rb | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index a070704f..65b6d7f0 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -130,6 +130,7 @@ def construct(*patterns) # @see http://www.w3.org/TR/sparql11-query/#specifyingDataset def from(uri) options[:from] = uri + self end # @param [Array] patterns @@ -238,6 +239,17 @@ def optional(*patterns) self end + ## + # @return [Query] + # @see http://www.w3.org/TR/sparql11-query/#union + def union(*patterns_list) + options[:unions] ||= [] + patterns_list.each do |patterns| + options[:unions] << build_patterns(patterns) + end + self + end + ## # @private def build_patterns(patterns) @@ -355,6 +367,16 @@ def to_s end buffer += serialize_patterns(patterns) + if options[:unions] + include_union = nil + options[:unions].each do |union_block| + buffer << include_union if include_union + buffer << '{' + buffer += serialize_patterns(union_block) + buffer << '} ' + include_union = "UNION " + end + end if options[:optionals] options[:optionals].each do |patterns| buffer << 'OPTIONAL {' From 6b0225b491438c3feef3486df95c7ba962873cdd Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 8 May 2013 10:05:32 -0700 Subject: [PATCH 04/75] multiple aggregate support and multiple froms --- lib/sparql/client/query.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 65b6d7f0..c72fdd50 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -334,8 +334,8 @@ def to_s buffer << 'REDUCED' if options[:reduced] buffer << ((values.empty? and not options[:count]) ? '*' : values.map { |v| SPARQL::Client.serialize_value(v[1]) }.join(' ')) if options[:count] - options[:count].each do |var, count| - buffer << '( COUNT(' + (options[:distinct] ? 'DISTINCT ' : '') + + options[:count].each do |var, count, aggregate| + buffer << "( #{aggregate.to_s.upcase}(" + (options[:distinct] ? 'DISTINCT ' : '') + (var.is_a?(String) ? var : "?#{var}") + ') AS ' + (count.is_a?(String) ? count : "?#{count}") + ' )' end end @@ -347,9 +347,8 @@ def to_s from = options[:from] if from - binding.pry from = from.instance_of?(Array) ? options[:from] : [options[:from]] - options[:from].each do |from| + from.each do |from| buffer << "FROM #{SPARQL::Client.serialize_value(from)}" end end From 385444de30da8bbef03441087bd6cb8e7f2bbc14 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 22 May 2013 11:51:55 -0700 Subject: [PATCH 05/75] sparql xml --- Gemfile | 1 + lib/sparql/client.rb | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Gemfile b/Gemfile index f0013de1..e60cce50 100644 --- a/Gemfile +++ b/Gemfile @@ -3,6 +3,7 @@ source "http://rubygems.org" gemspec :name => "" gem "jruby-openssl", :platforms => :jruby +gem "nokogiri" group :debug do gem 'shotgun' diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 661f7750..dd9aa714 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -1,6 +1,7 @@ require 'net/http/persistent' # @see http://rubygems.org/gems/net-http-persistent require 'rdf' # @see http://rubygems.org/gems/rdf require 'rdf/ntriples' # @see http://rubygems.org/gems/rdf +require 'nokogiri' module SPARQL ## @@ -56,8 +57,12 @@ class ServerError < StandardError; end # @option options [Hash] :headers def initialize(url, options = {}, &block) @url, @options = RDF::URI.new(url.to_s), options.dup + #@headers = { + # 'Accept' => [RESULT_JSON, RESULT_XML, RDF::Format.content_types.keys.map(&:to_s)].join(', ') + #}.merge(@options.delete(:headers) || {}) @headers = { - 'Accept' => [RESULT_JSON, RESULT_XML, RDF::Format.content_types.keys.map(&:to_s)].join(', ') + #'Accept' => RESULT_JSON.to_s + 'Accept' => RESULT_XML.to_s }.merge(@options.delete(:headers) || {}) @http = http_klass(@url.scheme) @@ -260,8 +265,9 @@ def update(query, options = {}) def response(query, options = {}) op = options[:op] || :query headers = options[:headers] || {} + query_options = (query.is_a?(Query) && query.options[:query_options]) || nil headers['Accept'] = options[:content_type] if options[:content_type] - request(query,op,headers) do |response| + request(query,op,headers,query_options) do |response| case response when Net::HTTPBadRequest # 400 Bad Request raise MalformedQuery.new(response.body) @@ -286,6 +292,7 @@ def parse_response(response, options = {}) when RESULT_JSON self.class.parse_json_bindings(response.body, nodes) when RESULT_XML + #self.class.parse_xml_nokiri(response.body, nodes) self.class.parse_xml_bindings(response.body, nodes) else parse_rdf_serialization(response, options) @@ -464,9 +471,9 @@ def http_klass(scheme) # @yieldparam [Net::HTTPResponse] response # @return [Net::HTTPResponse] # @see http://www.w3.org/TR/sparql11-protocol/#query-operation - def request(query, headers = {}, op = :query, &block) + def request(query, headers = {}, op = :query, query_options = nil, &block) method = (self.options[:method] || DEFAULT_METHOD).to_sym - request = send("make_#{method}_request", query,op , headers) + request = send("make_#{method}_request", query,op , headers, query_options) request.basic_auth(url.user, url.password) if url.user && !url.user.empty? @@ -485,7 +492,7 @@ def request(query, headers = {}, op = :query, &block) # @param [Hash{String => String}] headers # @return [Net::HTTPRequest] # @see http://www.w3.org/TR/sparql11-protocol/#query-via-get - def make_get_request(query,op = :query, headers = {}) + def make_get_request(query,op = :query, headers = {},query_options = nil) url = self.url.dup url.query_values = (url.query_values || {}).merge(op => query.to_s) request = Net::HTTP::Get.new(url.request_uri, self.headers.merge(headers)) @@ -500,13 +507,15 @@ def make_get_request(query,op = :query, headers = {}) # @return [Net::HTTPRequest] # @see http://www.w3.org/TR/sparql11-protocol/#query-via-post-direct # @see http://www.w3.org/TR/sparql11-protocol/#query-via-post-urlencoded - def make_post_request(query, headers = {}, op = :query) + def make_post_request(query, headers = {}, op = :query, query_options = nil) request = Net::HTTP::Post.new(self.url.request_uri, self.headers.merge(headers)) case (self.options[:protocol] || DEFAULT_PROTOCOL).to_s when '1.1' if self.options['Content-Type'] == "application/x-www-form-urlencoded" request['Content-Type'] = "application/x-www-form-urlencoded" - request.set_form_data(op => query.to_s) + form = {op => query.to_s} + form = form.merge(query_options) if query_options + request.set_form_data(form) else request['Content-Type'] = 'application/sparql-' + (@op || :query).to_s request.body = query.to_s From 80e2e494ba42139eafd5e5097210c0e6c40ec77c Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 22 May 2013 12:05:56 -0700 Subject: [PATCH 06/75] nokiri out --- Gemfile | 1 - lib/sparql/client.rb | 1 - 2 files changed, 2 deletions(-) diff --git a/Gemfile b/Gemfile index e60cce50..f0013de1 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,6 @@ source "http://rubygems.org" gemspec :name => "" gem "jruby-openssl", :platforms => :jruby -gem "nokogiri" group :debug do gem 'shotgun' diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index dd9aa714..7d79a328 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -1,7 +1,6 @@ require 'net/http/persistent' # @see http://rubygems.org/gems/net-http-persistent require 'rdf' # @see http://rubygems.org/gems/rdf require 'rdf/ntriples' # @see http://rubygems.org/gems/rdf -require 'nokogiri' module SPARQL ## From 3bbf9778548edd93a7ec7f4d35065cc1db430f5b Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 29 May 2013 15:56:35 -0700 Subject: [PATCH 07/75] JSON parser patch --- lib/sparql/client.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 7d79a328..2ee94a55 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -60,8 +60,8 @@ def initialize(url, options = {}, &block) # 'Accept' => [RESULT_JSON, RESULT_XML, RDF::Format.content_types.keys.map(&:to_s)].join(', ') #}.merge(@options.delete(:headers) || {}) @headers = { - #'Accept' => RESULT_JSON.to_s - 'Accept' => RESULT_XML.to_s + 'Accept' => RESULT_JSON.to_s + #'Accept' => RESULT_XML.to_s }.merge(@options.delete(:headers) || {}) @http = http_klass(@url.scheme) @@ -324,13 +324,18 @@ def self.parse_json_bindings(json, nodes = {}) # @return [RDF::Value] # @see http://www.w3.org/TR/rdf-sparql-json-res/#variable-binding-results def self.parse_json_value(value, nodes = {}) + return nil if value == {} case value['type'].to_sym when :bnode nodes[id = value['value']] ||= RDF::Node.new(id) when :uri RDF::URI.new(value['value']) when :literal - RDF::Literal.new(value['value'], :language => value['xml:lang']) + if value['xml:lang'] or value['lang'] + RDF::Literal.new(value['value'], :language => value['xml:lang']) + else + RDF::Literal.new(value['value'], :datatype => value['datatype']) + end when :'typed-literal' RDF::Literal.new(value['value'], :datatype => value['datatype']) else nil From 39149817eda009006ab030c3f8d58daaaec811ea Mon Sep 17 00:00:00 2001 From: Paul R Alexander Date: Thu, 30 May 2013 15:51:22 -0700 Subject: [PATCH 08/75] Add read_timeout to options --- lib/sparql/client.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 2ee94a55..107fa3c6 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -54,6 +54,7 @@ class ServerError < StandardError; end # @option options [Symbol] :method (DEFAULT_METHOD) # @option options [Number] :protocol (DEFAULT_PROTOCOL) # @option options [Hash] :headers + # @option options [Hash] :read_timeout def initialize(url, options = {}, &block) @url, @options = RDF::URI.new(url.to_s), options.dup #@headers = { @@ -463,6 +464,7 @@ def http_klass(scheme) end klass = Net::HTTP::Persistent.new(self.class.to_s, proxy_url) klass.keep_alive = 120 # increase to 2 minutes + klass.read_timeout = @options[:read_timeout] || 60 klass end From d05e9188d614831d15f2d31111f98fa5768489e0 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 12 Jun 2013 15:58:17 -0700 Subject: [PATCH 09/75] json required always --- lib/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 107fa3c6..a486f2ff 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -1,6 +1,7 @@ require 'net/http/persistent' # @see http://rubygems.org/gems/net-http-persistent require 'rdf' # @see http://rubygems.org/gems/rdf require 'rdf/ntriples' # @see http://rubygems.org/gems/rdf +require 'json' module SPARQL ## @@ -304,7 +305,6 @@ def parse_response(response, options = {}) # @return [] # @see http://www.w3.org/TR/rdf-sparql-json-res/#results def self.parse_json_bindings(json, nodes = {}) - require 'json' unless defined?(::JSON) json = JSON.parse(json.to_s) unless json.is_a?(Hash) case when json.has_key?('boolean') From e8277862d05956d6946cb0b74c55b3964bbabcca Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 14 Jun 2013 15:50:09 -0700 Subject: [PATCH 10/75] union with bind as blocks --- lib/sparql/client/query.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index c72fdd50..b46565ab 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -250,6 +250,14 @@ def union(*patterns_list) self end + def union_with_bind_as(*pattern_list) + options[:unions_with_bind] ||= [] + pattern_list.each do |patterns,bind_value,bind_var| + options[:unions_with_bind] << [build_patterns(patterns), bind_value,bind_var] + end + self + end + ## # @private def build_patterns(patterns) @@ -376,6 +384,17 @@ def to_s include_union = "UNION " end end + if options[:unions_with_bind] + include_union = nil + options[:unions_with_bind].each do |union_block, value_bind, var_bind| + buffer << include_union if include_union + buffer << '{' + buffer += serialize_patterns(union_block) + buffer << "BIND (\"#{value_bind}\" as ?#{var_bind.to_s})" + buffer << '}' + include_union = "UNION " + end + end if options[:optionals] options[:optionals].each do |patterns| buffer << 'OPTIONAL {' From 901e0f760706a509e19f3914d50a2aa882636158 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 2 Jul 2013 17:37:14 -0700 Subject: [PATCH 11/75] force UTF8 encoding when parsing json resultsets --- lib/sparql/client.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index a486f2ff..82f18a75 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -305,6 +305,7 @@ def parse_response(response, options = {}) # @return [] # @see http://www.w3.org/TR/rdf-sparql-json-res/#results def self.parse_json_bindings(json, nodes = {}) + json.force_encoding(::Encoding::UTF_8) if json.respond_to?(:force_encoding) json = JSON.parse(json.to_s) unless json.is_a?(Hash) case when json.has_key?('boolean') From edd369b06c36afbb2605c7758b8e5c949e554de9 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 31 Jul 2013 18:01:48 -0700 Subject: [PATCH 12/75] goo query traces in thread current --- lib/sparql/client.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 82f18a75..8a4b51d9 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -234,6 +234,18 @@ def nodes # @return [Array] # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) + #TODO less intrusive ? + if Thread.current[:ncbo_debug] + @op = :query + qstart = Time.now + r = response(query, options) + query_time = Time.now - qstart + pstart = Time.now + parsed = parse_response(r, options) + parse_time = Time.now - pstart + (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] + return parsed + end @op = :query parse_response(response(query, options), options) end From 797d83363719b771e7b02d3bf0a656ab4343ae3b Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 6 Sep 2013 16:34:34 -0700 Subject: [PATCH 13/75] ignore vim swp --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index aeaa504b..825a55b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ *.lock pkg tmp + +*.swp From b5e2d4a6746dd74a1fcbbafa91ba13c38f99163d Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 6 Sep 2013 16:35:16 -0700 Subject: [PATCH 14/75] graph based cache key for queries --- lib/sparql/client/query.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index b46565ab..bc663b6a 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -1,3 +1,5 @@ +require 'digest/md5' + module SPARQL; class Client ## # A SPARQL query builder. @@ -258,6 +260,17 @@ def union_with_bind_as(*pattern_list) self end + def cache_key + return nil if options[:from].nil? || options[:from].empty? + from = options[:from] + from = [from] unless from.instance_of?(Array) + from = from.map { |x| x.to_s }.uniq.sort + sorted_graphs = from.join ":" + digest = Digest::MD5.hexdigest(self.to_s) + from = from.map { |x| "sparql:graph:#{x}" } + return { graphs: from, query: "sparql:#{sorted_graphs}:#{digest}" } + end + ## # @private def build_patterns(patterns) From 6c523bb223aa337457d4e7ac2296018c8f512b23 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 6 Sep 2013 16:36:17 -0700 Subject: [PATCH 15/75] cache implementation using redis at the sparql client level --- lib/sparql/client.rb | 74 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 8a4b51d9..6b935c03 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -28,6 +28,9 @@ class ServerError < StandardError; end ACCEPT_XML = {'Accept' => RESULT_XML}.freeze ACCEPT_BRTR = {'Accept' => RESULT_BRTR}.freeze + SPARQL_CACHE_QUERIES = "sparql:queries" + SPARQL_CACHE_GRAPHS = "sparql:graphs" + DEFAULT_PROTOCOL = 1.0 DEFAULT_METHOD = :post @@ -57,6 +60,10 @@ class ServerError < StandardError; end # @option options [Hash] :headers # @option options [Hash] :read_timeout def initialize(url, options = {}, &block) + @redis_cache = nil + if options[:redis_cache] + @redis_cache = options[:redis_cache] + end @url, @options = RDF::URI.new(url.to_s), options.dup #@headers = { # 'Accept' => [RESULT_JSON, RESULT_XML, RDF::Format.content_types.keys.map(&:to_s)].join(', ') @@ -235,6 +242,13 @@ def nodes # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) #TODO less intrusive ? + if @redis_cache && query.instance_of?(SPARQL::Client::Query) + cache_response = @redis_cache.get(query.cache_key) + if cache_response + return Marshal.load(cache_response) + end + options[:cache_key] = query.cache_key + end if Thread.current[:ncbo_debug] @op = :query qstart = Time.now @@ -262,6 +276,9 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update + if @redis_cache + query_delete_cache(query) + end parse_response(response(query, options), options) self end @@ -294,6 +311,51 @@ def response(query, options = {}) end end + def query_delete_cache(update) + if update.options[:graph].nil? + raise Exception, "Unsuported cacheable query" + end + graph = "sparql:graph:#{update.options[:graph].to_s}" + + if @redis_cache.exists(graph) + begin + #invalidate all the entries + #better rename+short expire than delete + query_entries = @redis_cache.smembers(graph) + @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) + query_entries.each do |e| + if @redis_cache.exists(e) + @redis_cache.rename e, "tmp:#{e}" + @redis_cache.expire("tmp:#{e}",2) + end + end + if @redis_cache.exists(graph) + @redis_cache.rename graph, "tmp:#{graph}" + @redis_cache.expire("tmp:#{graph}",2) + end + rescue => exception + puts "warning: error in cache invalidation `#{exception}`" + puts exception.backtrace + end + end + end + + def query_put_cache(keys,entry) + expiration = 86400 #1 day + if defined?(SPARQL_CACHE_EXPIRATION_TIME) + expiration = SPARQL_CACHE_EXPIRATION_TIME + end + @redis_cache.multi do + keys[:graphs].each do |g| + @redis_cache.sadd(g,keys[:query]) + @redis_cache.sadd(SPARQL_CACHE_GRAPHS,g) + end + @redis_cache.set(keys[:query],Marshal.dump(entry)) + @redis_cache.sadd(SPARQL_CACHE_QUERIES,keys[:query]) + @redis_cache.expire(keys[:query],expiration) + end + end + ## # @param [Net::HTTPSuccess] response # @param [Hash{Symbol => Object}] options @@ -303,7 +365,11 @@ def parse_response(response, options = {}) when RESULT_BOOL # Sesame-specific response.body == 'true' when RESULT_JSON - self.class.parse_json_bindings(response.body, nodes) + result_data = self.class.parse_json_bindings(response.body, nodes) + if options[:cache_key] + query_put_cache(options[:cache_key],result_data) + end + return result_data when RESULT_XML #self.class.parse_xml_nokiri(response.body, nodes) self.class.parse_xml_bindings(response.body, nodes) @@ -317,7 +383,7 @@ def parse_response(response, options = {}) # @return [] # @see http://www.w3.org/TR/rdf-sparql-json-res/#results def self.parse_json_bindings(json, nodes = {}) - json.force_encoding(::Encoding::UTF_8) if json.respond_to?(:force_encoding) + json = json.force_encoding(::Encoding::UTF_8) if json.respond_to?(:force_encoding) json = JSON.parse(json.to_s) unless json.is_a?(Hash) case when json.has_key?('boolean') @@ -457,6 +523,10 @@ def inspect sprintf("#<%s:%#0x(%s)>", self.class.name, __id__, url.to_s) end + def redis_cache=(redis_cache) + @redis_cache = redis_cache + end + protected ## From 113ec041cce43e0ba07f8de93d23d66dc9b7e98c Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 6 Sep 2013 17:50:22 -0700 Subject: [PATCH 16/75] safe graph invalidation --- lib/sparql/client.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 6b935c03..c8334827 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -243,7 +243,7 @@ def nodes def query(query, options = {}) #TODO less intrusive ? if @redis_cache && query.instance_of?(SPARQL::Client::Query) - cache_response = @redis_cache.get(query.cache_key) + cache_response = @redis_cache.get(query.cache_key[:query]) if cache_response return Marshal.load(cache_response) end @@ -315,8 +315,12 @@ def query_delete_cache(update) if update.options[:graph].nil? raise Exception, "Unsuported cacheable query" end - graph = "sparql:graph:#{update.options[:graph].to_s}" + cache_invalidate_graph(update.options[:graph].to_s) + end + def cache_invalidate_graph(graph) + return if @redis_cache.nil? + graph = "sparql:graph:#{graph.to_s}" if @redis_cache.exists(graph) begin #invalidate all the entries From e2ad816206f6efc4fac0873e885c24ed32c31b72 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Sun, 8 Sep 2013 19:24:08 -0700 Subject: [PATCH 17/75] support multi-graph cache invalidation - for nquads uploads --- lib/sparql/client.rb | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index c8334827..3772a056 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -318,28 +318,31 @@ def query_delete_cache(update) cache_invalidate_graph(update.options[:graph].to_s) end - def cache_invalidate_graph(graph) + def cache_invalidate_graph(graphs) return if @redis_cache.nil? - graph = "sparql:graph:#{graph.to_s}" - if @redis_cache.exists(graph) - begin - #invalidate all the entries - #better rename+short expire than delete - query_entries = @redis_cache.smembers(graph) - @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) - query_entries.each do |e| - if @redis_cache.exists(e) - @redis_cache.rename e, "tmp:#{e}" - @redis_cache.expire("tmp:#{e}",2) + graphs = [graphs] unless graph.instance_of?(Array) + graphs.each do |graph| + graph = "sparql:graph:#{graph.to_s}" + if @redis_cache.exists(graph) + begin + #invalidate all the entries + #better rename+short expire than delete + query_entries = @redis_cache.smembers(graph) + @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) + query_entries.each do |e| + if @redis_cache.exists(e) + @redis_cache.rename e, "tmp:#{e}" + @redis_cache.expire("tmp:#{e}",2) + end end + if @redis_cache.exists(graph) + @redis_cache.rename graph, "tmp:#{graph}" + @redis_cache.expire("tmp:#{graph}",2) + end + rescue => exception + puts "warning: error in cache invalidation `#{exception}`" + puts exception.backtrace end - if @redis_cache.exists(graph) - @redis_cache.rename graph, "tmp:#{graph}" - @redis_cache.expire("tmp:#{graph}",2) - end - rescue => exception - puts "warning: error in cache invalidation `#{exception}`" - puts exception.backtrace end end end From e5f90ba90dae53f0fb8f4cd05ad06b10fb46c3fe Mon Sep 17 00:00:00 2001 From: msalvadores Date: Sun, 8 Sep 2013 20:43:55 -0700 Subject: [PATCH 18/75] resolved conflicts --- lib/sparql/client.rb | 128 +++++++++++++++++++++++++++++++------ lib/sparql/client/query.rb | 5 +- 2 files changed, 113 insertions(+), 20 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 3772a056..02af13b8 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -10,6 +10,7 @@ module SPARQL # @see http://www.w3.org/TR/sparql11-query/ # @see http://www.w3.org/TR/sparql11-protocol/ # @see http://www.w3.org/TR/sparql11-results-json/ + # @see http://www.w3.org/TR/sparql11-results-csv-tsv/ class Client autoload :Query, 'sparql/client/query' autoload :Repository, 'sparql/client/repository' @@ -22,10 +23,14 @@ class ServerError < StandardError; end RESULT_JSON = 'application/sparql-results+json'.freeze RESULT_XML = 'application/sparql-results+xml'.freeze + RESULT_CSV = 'text/csv'.freeze + RESULT_TSV = 'text/tab-separated-values'.freeze RESULT_BOOL = 'text/boolean'.freeze # Sesame-specific RESULT_BRTR = 'application/x-binary-rdf-results-table'.freeze # Sesame-specific ACCEPT_JSON = {'Accept' => RESULT_JSON}.freeze ACCEPT_XML = {'Accept' => RESULT_XML}.freeze + ACCEPT_CSV = {'Accept' => RESULT_CSV}.freeze + ACCEPT_TSV = {'Accept' => RESULT_TSV}.freeze ACCEPT_BRTR = {'Accept' => RESULT_BRTR}.freeze SPARQL_CACHE_QUERIES = "sparql:queries" @@ -35,9 +40,9 @@ class ServerError < StandardError; end DEFAULT_METHOD = :post ## - # The SPARQL endpoint URL. + # The SPARQL endpoint URL, or an RDF::Queryable instance, to use the native SPARQL engine. # - # @return [RDF::URI] + # @return [RDF::URI, RDF::Queryable] attr_reader :url ## @@ -53,7 +58,12 @@ class ServerError < StandardError; end attr_reader :options ## - # @param [String, #to_s] url + # Initialize a new sparql client, either using the URL of + # a SPARQL endpoint or an `RDF::Queryable` instance to use + # the native SPARQL gem. + # + # @param [String, RDF::Queryable, #to_s] url + # URL of endpoint, or queryable object. # @param [Hash{Symbol => Object}] options # @option options [Symbol] :method (DEFAULT_METHOD) # @option options [Number] :protocol (DEFAULT_PROTOCOL) @@ -64,15 +74,20 @@ def initialize(url, options = {}, &block) if options[:redis_cache] @redis_cache = options[:redis_cache] end - @url, @options = RDF::URI.new(url.to_s), options.dup - #@headers = { - # 'Accept' => [RESULT_JSON, RESULT_XML, RDF::Format.content_types.keys.map(&:to_s)].join(', ') - #}.merge(@options.delete(:headers) || {}) - @headers = { - 'Accept' => RESULT_JSON.to_s - #'Accept' => RESULT_XML.to_s - }.merge(@options.delete(:headers) || {}) - @http = http_klass(@url.scheme) + case url + when RDF::Queryable + @url, @options = url, options.dup + else + @url, @options = RDF::URI.new(url.to_s), options.dup +# @headers = { +# 'Accept' => [RESULT_JSON, RESULT_XML, "#{RESULT_TSV};p=0.8", "#{RESULT_CSV};p=0.2", RDF::Format.content_types.keys.map(&:to_s)].join(', ') +# }.merge(@options.delete(:headers) || {}) + @headers = { + 'Accept' => RESULT_JSON.to_s + #'Accept' => RESULT_XML.to_s + }.merge(@options.delete(:headers) || {}) + @http = http_klass(@url.scheme) + end if block_given? case block.arity @@ -203,10 +218,19 @@ def clear_graph(graph_uri, options = {}) # @example `CLEAR ALL` # client.clear(:all) # - # @param [Symbol, #to_sym] what - # @param [Hash{Symbol => Object}] options - # @option options [Boolean] :silent - # @return [void] `self` + # @overload clear(what, *arguments) + # @param [Symbol, #to_sym] what + # @param [Array] arguments splat of other arguments to {Update::Clear}. + # @option options [Boolean] :silent + # @return [void] `self` + # + # @overload clear(what, *arguments, options = {}) + # @param [Symbol, #to_sym] what + # @param [Array] arguments splat of other arguments to {Update::Clear}. + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :silent + # @return [void] `self` + # # @see http://www.w3.org/TR/sparql11-update/#clear def clear(what, *arguments) self.update(Update::Clear.new(what, *arguments)) @@ -261,7 +285,13 @@ def query(query, options = {}) return parsed end @op = :query - parse_response(response(query, options), options) + case @url + when RDF::Queryable + require 'sparql' unless defined?(::SPARQL::Grammar) + SPARQL.execute(query, @url, options) + else + parse_response(response(query, options), options) + end end ## @@ -279,7 +309,13 @@ def update(query, options = {}) if @redis_cache query_delete_cache(query) end - parse_response(response(query, options), options) + case @url + when RDF::Queryable + require 'sparql' unless defined?(::SPARQL::Grammar) + SPARQL.execute(query, @url, options) + else + parse_response(response(query, options), options) + end self end @@ -380,6 +416,10 @@ def parse_response(response, options = {}) when RESULT_XML #self.class.parse_xml_nokiri(response.body, nodes) self.class.parse_xml_bindings(response.body, nodes) + when RESULT_CSV + self.class.parse_csv_bindings(response.body, nodes) + when RESULT_TSV + self.class.parse_tsv_bindings(response.body, nodes) else parse_rdf_serialization(response, options) end @@ -429,6 +469,56 @@ def self.parse_json_value(value, nodes = {}) end end + ## + # @param [String, Array>] csv + # @return [] + # @see http://www.w3.org/TR/sparql11-results-csv-tsv/ + def self.parse_csv_bindings(csv, nodes = {}) + require 'csv' unless defined?(::CSV) + csv = CSV.parse(csv.to_s) unless csv.is_a?(Array) + vars = csv.shift + solutions = RDF::Query::Solutions.new + csv.each do |row| + solution = RDF::Query::Solution.new + row.each_with_index do |v, i| + term = case v + when /^_:(.*)$/ then nodes[$1] ||= RDF::Node($1) + when /^\w+:.*$/ then RDF::URI(v) + else RDF::Literal(v) + end + solution[vars[i].to_sym] = term + end + solutions << solution + end + solutions + end + + ## + # @param [String, Array>] tsv + # @return [] + # @see http://www.w3.org/TR/sparql11-results-csv-tsv/ + def self.parse_tsv_bindings(tsv, nodes = {}) + tsv = tsv.lines.map {|l| l.chomp.split("\t")} unless tsv.is_a?(Array) + vars = tsv.shift.map {|h| h.sub(/^\?/, '')} + solutions = RDF::Query::Solutions.new + tsv.each do |row| + solution = RDF::Query::Solution.new + row.each_with_index do |v, i| + term = RDF::NTriples.unserialize(v) || case v + when /^\d+\.\d*[eE][+-]?[0-9]+$/ then RDF::Literal::Double.new(v) + when /^\d*\.\d+[eE][+-]?[0-9]+$/ then RDF::Literal::Double.new(v) + when /^\d*\.\d+$/ then RDF::Literal::Decimal.new(v) + when /^\d+$/ then RDF::Literal::Integer.new(v) + else + RDF::Literal(v) + end + solution[vars[i].to_sym] = term + end + solutions << solution + end + solutions + end + ## # @param [String, REXML::Element] xml # @return [] @@ -617,7 +707,7 @@ def make_post_request(query, headers = {}, op = :query, query_options = nil) request.body = query.to_s end when '1.0' - request.set_form_data(:query => query.to_s) + request.set_form_data((@op || :query) => query.to_s) else raise ArgumentError, "unknown SPARQL protocol version: #{self.options[:protocol].inspect}" end diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index bc663b6a..4f626614 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -135,7 +135,9 @@ def from(uri) self end - # @param [Array] patterns + ## + # @param [Array] patterns_queries + # splat of zero or more patterns followed by zero or more queries. # @return [Query] # @see http://www.w3.org/TR/sparql11-query/#GraphPattern def where(*patterns_queries) @@ -294,6 +296,7 @@ def filter(string) def true? case result when TrueClass, FalseClass then result + when RDF::Literal::Boolean then result.true? when Enumerable then !result.empty? else false end From 1f74d0bad25241417415c47363911d9d77469316 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Sun, 8 Sep 2013 23:20:46 -0700 Subject: [PATCH 19/75] bypass cache options --- lib/sparql/client.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 02af13b8..1ff49a47 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -266,7 +266,7 @@ def nodes # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) #TODO less intrusive ? - if @redis_cache && query.instance_of?(SPARQL::Client::Query) + if @redis_cache && !query.options[:bypass_cache] && query.instance_of?(SPARQL::Client::Query) cache_response = @redis_cache.get(query.cache_key[:query]) if cache_response return Marshal.load(cache_response) @@ -306,7 +306,7 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update - if @redis_cache + if @redis_cache && !query.options[:bypass_cache] query_delete_cache(query) end case @url From ece2cbe505d24072bcf0e20a8c17e7aba66a8b50 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 9 Sep 2013 09:55:12 -0700 Subject: [PATCH 20/75] support String query caching, graphs need to be in options --- lib/sparql/client.rb | 20 ++++++++++++++------ lib/sparql/client/query.rb | 7 ++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 1ff49a47..b9c2e991 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -266,12 +266,20 @@ def nodes # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) #TODO less intrusive ? - if @redis_cache && !query.options[:bypass_cache] && query.instance_of?(SPARQL::Client::Query) - cache_response = @redis_cache.get(query.cache_key[:query]) - if cache_response - return Marshal.load(cache_response) + unless query.respond_to?(:options) && query.options[:bypass_cache] + if @redis_cache && (query.instance_of?(SPARQL::Client::Query) || options[:graphs]) + cache_key = nil + if options[:graphs] + cache_key = SPARQL::Client::Query.generate_cache_key(query,options[:graphs]) + else + cache_key = query.cache_key + end + cache_response = @redis_cache.get(cache_key[:query]) + if cache_response + return Marshal.load(cache_response) + end + options[:cache_key] = cache_key end - options[:cache_key] = query.cache_key end if Thread.current[:ncbo_debug] @op = :query @@ -356,7 +364,7 @@ def query_delete_cache(update) def cache_invalidate_graph(graphs) return if @redis_cache.nil? - graphs = [graphs] unless graph.instance_of?(Array) + graphs = [graphs] unless graphs.instance_of?(Array) graphs.each do |graph| graph = "sparql:graph:#{graph.to_s}" if @redis_cache.exists(graph) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 4f626614..006eb45f 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -262,13 +262,18 @@ def union_with_bind_as(*pattern_list) self end + def cache_key return nil if options[:from].nil? || options[:from].empty? from = options[:from] from = [from] unless from.instance_of?(Array) + return Query.generate_cache_key(self.to_s,from) + end + + def self.generate_cache_key(string,from) from = from.map { |x| x.to_s }.uniq.sort sorted_graphs = from.join ":" - digest = Digest::MD5.hexdigest(self.to_s) + digest = Digest::MD5.hexdigest(string) from = from.map { |x| "sparql:graph:#{x}" } return { graphs: from, query: "sparql:#{sorted_graphs}:#{digest}" } end From 340c539c236fbed8adf35b1bd35cf62053abbf25 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 16 Sep 2013 16:46:53 -0700 Subject: [PATCH 21/75] helper to flush goo cache --- Gemfile | 1 + lib/sparql/client.rb | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Gemfile b/Gemfile index b2cefe70..18510df1 100644 --- a/Gemfile +++ b/Gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gemspec :name => "" gem "jruby-openssl", :platforms => :jruby +gem 'cube-ruby' group :debug do gem 'shotgun' diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index b9c2e991..61c81409 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -391,6 +391,26 @@ def cache_invalidate_graph(graphs) end end + def cache_invalidate_all + return if @redis_cache.nil? + to_delete = [] + all_queries = @redis_cache.smembers(SPARQL_CACHE_QUERIES) + all_graphs = @redis_cache.smembers(SPARQL_CACHE_GRAPHS) + @redis_cache.del(SPARQL_CACHE_QUERIES) + @redis_cache.del(SPARQL_CACHE_GRAPHS) + + puts "deleting #{all_queries.length} query entries" + all_queries.each_slice(500_000) do |query_keys| + @redis_cache.del query_keys + end + puts "deleting #{all_graphs.length} graph entries" + all_graphs.each_slice(500_000) do |query_graphs| + @redis_cache.del query_graphs + end + puts "done with the cache deletion" + end + + def query_put_cache(keys,entry) expiration = 86400 #1 day if defined?(SPARQL_CACHE_EXPIRATION_TIME) From 63172971cf97c32f28ae730a81c33c9c4951b0a9 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Thu, 19 Sep 2013 12:17:10 -0700 Subject: [PATCH 22/75] hack for stats call that return millions of rows --- lib/sparql/client.rb | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 61c81409..2e8a3c81 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -284,10 +284,33 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] @op = :query qstart = Time.now + if query.to_s["strbefore"] + options[:content_type] = "text/plain" + end r = response(query, options) query_time = Time.now - qstart pstart = Time.now - parsed = parse_response(r, options) + parsed = nil + if !query.to_s["strbefore"] + parsed = parse_response(r, options) + else + if r.body + line = 0 + parsed = {} + r.body.split("\n").each do |x| + line += 1 + next if line == 1 + acr = x[1..-2] + unless parsed.include?(acr) + parsed[acr] = 0 + end + parsed[acr] += 1 + end + query_put_cache(options[:cache_key],parsed) + else + raise Exception, "SPARQL: error returning mapping stats" + end + end parse_time = Time.now - pstart (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] return parsed From 03a8245200175ee24a93dc0d1a4afeeaa8ce2079 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Thu, 19 Sep 2013 16:39:46 -0700 Subject: [PATCH 23/75] hack for stats without ncbo_debug --- lib/sparql/client.rb | 73 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 2e8a3c81..19cb62a3 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -281,48 +281,49 @@ def query(query, options = {}) options[:cache_key] = cache_key end end - if Thread.current[:ncbo_debug] - @op = :query - qstart = Time.now - if query.to_s["strbefore"] - options[:content_type] = "text/plain" - end - r = response(query, options) - query_time = Time.now - qstart - pstart = Time.now - parsed = nil - if !query.to_s["strbefore"] - parsed = parse_response(r, options) - else - if r.body - line = 0 - parsed = {} - r.body.split("\n").each do |x| - line += 1 - next if line == 1 - acr = x[1..-2] - unless parsed.include?(acr) - parsed[acr] = 0 - end - parsed[acr] += 1 + @op = :query + qstart = Time.now + if query.to_s["strbefore"] + options[:content_type] = "text/plain" + end + r = response(query, options) + query_time = Time.now - qstart + pstart = Time.now + parsed = parse_response(r, options) + if !query.to_s["strbefore"] + else + if r.body + line = 0 + parsed = {} + r.body.split("\n").each do |x| + line += 1 + next if line == 1 + acr = x[1..-2] + unless parsed.include?(acr) + parsed[acr] = 0 end + parsed[acr] += 1 + end + if options[:cache_key] query_put_cache(options[:cache_key],parsed) - else - raise Exception, "SPARQL: error returning mapping stats" end + else + raise Exception, "SPARQL: error returning mapping stats" end - parse_time = Time.now - pstart - (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] - return parsed end - @op = :query - case @url - when RDF::Queryable - require 'sparql' unless defined?(::SPARQL::Grammar) - SPARQL.execute(query, @url, options) - else - parse_response(response(query, options), options) + parse_time = Time.now - pstart + if Thread.current[:ncbo_debug] + (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end + return parsed + #@op = :query + #case @url + #when RDF::Queryable + # require 'sparql' unless defined?(::SPARQL::Grammar) + # SPARQL.execute(query, @url, options) + #else + # parse_response(response(query, options), options) + #end end ## From b63c18290bad1250872b8709513a1068886cb43e Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 24 Sep 2013 14:15:42 -0700 Subject: [PATCH 24/75] cube support for cache and queries --- Gemfile | 2 +- lib/sparql/client.rb | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/Gemfile b/Gemfile index 18510df1..8e9d2919 100644 --- a/Gemfile +++ b/Gemfile @@ -3,7 +3,7 @@ source "https://rubygems.org" gemspec :name => "" gem "jruby-openssl", :platforms => :jruby -gem 'cube-ruby' +gem 'cube-ruby', require: "cube" group :debug do gem 'shotgun' diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 19cb62a3..746e6384 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -2,6 +2,7 @@ require 'rdf' # @see http://rubygems.org/gems/rdf require 'rdf/ntriples' # @see http://rubygems.org/gems/rdf require 'json' +require 'cube' module SPARQL ## @@ -74,6 +75,10 @@ def initialize(url, options = {}, &block) if options[:redis_cache] @redis_cache = options[:redis_cache] end + @cube = nil + if options[:cube_options] + cube_options=options[:cube_options] + end case url when RDF::Queryable @url, @options = url, options.dup @@ -266,6 +271,7 @@ def nodes # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) #TODO less intrusive ? + start = Time.now unless query.respond_to?(:options) && query.options[:bypass_cache] if @redis_cache && (query.instance_of?(SPARQL::Client::Query) || options[:graphs]) cache_key = nil @@ -276,6 +282,10 @@ def query(query, options = {}) end cache_response = @redis_cache.get(cache_key[:query]) if cache_response + if @cube + @cube.send("goo_cache_hit", DateTime.now, + duration_ms: ((Time.now - start)*1000).ceil) rescue nil + end return Marshal.load(cache_response) end options[:cache_key] = cache_key @@ -315,6 +325,11 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end + if @cube + @cube.send("goo_query_hit", DateTime.now, + duration_ms: ((Time.now - start)*1000).ceil, + query: query.to_s) rescue nil + end return parsed #@op = :query #case @url @@ -346,7 +361,13 @@ def update(query, options = {}) require 'sparql' unless defined?(::SPARQL::Grammar) SPARQL.execute(query, @url, options) else + start = Time.now parse_response(response(query, options), options) + if @cube + @cube.send("sparql_write_data", DateTime.now, + duration_ms: ((Time.now - start)*1000).ceil, + type_write: query.class.name.split("::")[-1].downcase) rescue nil + end end self end @@ -462,7 +483,9 @@ def parse_response(response, options = {}) when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) if options[:cache_key] - query_put_cache(options[:cache_key],result_data) + if options[:cache_key] + query_put_cache(options[:cache_key],result_data) + end end return result_data when RESULT_XML @@ -676,6 +699,16 @@ def redis_cache=(redis_cache) @redis_cache = redis_cache end + def cube_options=(cube_options) + if cube_options + cube_host = cube_options[:host] || "localhost" + cube_port = cube_options[:port] || 1180 + @cube = Cube::Client.new(cube_host, cube_port) + else + @cube = nil + end + end + protected ## From b436e9b11ef2a2a337b19d9e9e10486028811e1b Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 24 Sep 2013 15:54:47 -0700 Subject: [PATCH 25/75] query options for strings --- lib/sparql/client.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 746e6384..31901321 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -385,6 +385,9 @@ def response(query, options = {}) op = options[:op] || :query headers = options[:headers] || {} query_options = (query.is_a?(Query) && query.options[:query_options]) || nil + unless query_options + query_options = (query.is_a?(String) && options[:query_options]) || nil + end headers['Accept'] = options[:content_type] if options[:content_type] request(query,op,headers,query_options) do |response| case response From 4089cb9f6391e1849f4d41075d1331b7a117079e Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 30 Sep 2013 12:06:31 -0700 Subject: [PATCH 26/75] not renaming keys in cache - direct delete --- lib/sparql/client.rb | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 31901321..7f8824e1 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -414,23 +414,16 @@ def cache_invalidate_graph(graphs) return if @redis_cache.nil? graphs = [graphs] unless graphs.instance_of?(Array) graphs.each do |graph| - graph = "sparql:graph:#{graph.to_s}" + graph = graph.to_s + graph = "sparql:graph:#{graph}" if graph.start_with?("sparql:graph:") if @redis_cache.exists(graph) begin - #invalidate all the entries - #better rename+short expire than delete query_entries = @redis_cache.smembers(graph) @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) query_entries.each do |e| - if @redis_cache.exists(e) - @redis_cache.rename e, "tmp:#{e}" - @redis_cache.expire("tmp:#{e}",2) - end - end - if @redis_cache.exists(graph) - @redis_cache.rename graph, "tmp:#{graph}" - @redis_cache.expire("tmp:#{graph}",2) + @redis_cache.del(e) end + @redis_cache.del(graph) rescue => exception puts "warning: error in cache invalidation `#{exception}`" puts exception.backtrace From f52d6e8845d0a14fd4b3b59d743c83310e1e4b2e Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 30 Sep 2013 12:48:03 -0700 Subject: [PATCH 27/75] longer expiration - no we have LRU expiration for volatile data in the redis conf --- lib/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 7f8824e1..2185bd26 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -453,7 +453,7 @@ def cache_invalidate_all def query_put_cache(keys,entry) - expiration = 86400 #1 day + expiration = 5 * 86400 #5 day if defined?(SPARQL_CACHE_EXPIRATION_TIME) expiration = SPARQL_CACHE_EXPIRATION_TIME end From 6114aa9e040ac707bdfde8414fa03b7fc5d46121 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 30 Sep 2013 13:00:09 -0700 Subject: [PATCH 28/75] reverse logic for prefix graph name --- lib/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 2185bd26..059c4bcb 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -415,7 +415,7 @@ def cache_invalidate_graph(graphs) graphs = [graphs] unless graphs.instance_of?(Array) graphs.each do |graph| graph = graph.to_s - graph = "sparql:graph:#{graph}" if graph.start_with?("sparql:graph:") + graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") if @redis_cache.exists(graph) begin query_entries = @redis_cache.smembers(graph) From 1c7f9a8ef20bd468a56fa3c2498e6400d61e502d Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 1 Oct 2013 15:27:15 -0700 Subject: [PATCH 29/75] safer json parsing for unicode --- lib/sparql/client.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 059c4bcb..b4a6d1ab 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -501,8 +501,13 @@ def parse_response(response, options = {}) # @return [] # @see http://www.w3.org/TR/rdf-sparql-json-res/#results def self.parse_json_bindings(json, nodes = {}) - json = json.force_encoding(::Encoding::UTF_8) if json.respond_to?(:force_encoding) - json = JSON.parse(json.to_s) unless json.is_a?(Hash) + begin + json = JSON.parse(json.to_s) unless json.is_a?(Hash) + rescue JSON::ParserError => e + #retry only if parse error + json = json.encode(::Encoding::UTF_8,:invalid => :replace, :undef => :replace, :replace => "?") + json = JSON.parse(json.to_s) unless json.is_a?(Hash) + end case when json.has_key?('boolean') json['boolean'] From 30fd412e1adec775d9069be2da586ffc876f1507 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 1 Oct 2013 17:12:08 -0700 Subject: [PATCH 30/75] remove control characters if exception in parsing JSON --- lib/sparql/client.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index b4a6d1ab..66343399 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -501,11 +501,11 @@ def parse_response(response, options = {}) # @return [] # @see http://www.w3.org/TR/rdf-sparql-json-res/#results def self.parse_json_bindings(json, nodes = {}) + json = json.force_encoding(::Encoding::UTF_8) if json.respond_to?(:force_encoding) begin json = JSON.parse(json.to_s) unless json.is_a?(Hash) - rescue JSON::ParserError => e - #retry only if parse error - json = json.encode(::Encoding::UTF_8,:invalid => :replace, :undef => :replace, :replace => "?") + rescue Exception => e + json = json.split("").select { |x| x.ord > 31 }.join '' json = JSON.parse(json.to_s) unless json.is_a?(Hash) end case From f26705cf748aa5ac4641adffacc3a89e4ca4e0fd Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 4 Oct 2013 17:44:45 -0700 Subject: [PATCH 31/75] fallback to graph entry for cheking the cache --- lib/sparql/client.rb | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 66343399..39de974a 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -282,11 +282,20 @@ def query(query, options = {}) end cache_response = @redis_cache.get(cache_key[:query]) if cache_response - if @cube - @cube.send("goo_cache_hit", DateTime.now, - duration_ms: ((Time.now - start)*1000).ceil) rescue nil + cache_key[:graphs].each do |g| + unless @redis_cache.sismember(g,cache_key[:query]) + @redis_cache.del(cache_key[:query]) + cache_response = nil + break + end + end + if cache_response + if @cube + @cube.send("goo_cache_hit", DateTime.now, + duration_ms: ((Time.now - start)*1000).ceil) rescue nil + end + return Marshal.load(cache_response) end - return Marshal.load(cache_response) end options[:cache_key] = cache_key end From de82689e690f400833653b0ecc51676396e945a8 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 4 Oct 2013 17:45:22 -0700 Subject: [PATCH 32/75] retries in invalidate cache --- lib/sparql/client.rb | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 39de974a..d16e6cd8 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -423,19 +423,28 @@ def cache_invalidate_graph(graphs) return if @redis_cache.nil? graphs = [graphs] unless graphs.instance_of?(Array) graphs.each do |graph| - graph = graph.to_s - graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") - if @redis_cache.exists(graph) - begin - query_entries = @redis_cache.smembers(graph) - @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) - query_entries.each do |e| - @redis_cache.del(e) + attempts = 0 + begin + graph = graph.to_s + graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") + if @redis_cache.exists(graph) + begin + query_entries = @redis_cache.smembers(graph) + @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) + query_entries.each do |e| + @redis_cache.del(e) + end + @redis_cache.del(graph) + rescue => exception + puts "warning: error in cache invalidation `#{exception}`" + puts exception.backtrace end - @redis_cache.del(graph) - rescue => exception - puts "warning: error in cache invalidation `#{exception}`" - puts exception.backtrace + end + rescue Exception => e + if attempts < 3 + attempts += 1 + sleep(5) + retry end end end From 972f264109779a7522552cda0325755dbc004edc Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 5 Nov 2013 11:51:32 -0800 Subject: [PATCH 33/75] potential fix for NCBO-368 --- lib/sparql/client.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index d16e6cd8..42b32cec 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -767,6 +767,8 @@ def request(query, headers = {}, op = :query, query_options = nil, &block) request.basic_auth(url.user, url.password) if url.user && !url.user.empty? + @http.open_timeout = @http.read_timeout + @http.idle_timeout = nil response = @http.request(url, request) if block_given? block.call(response) From 1dc17f0bd9966b3a4d93d5d9ccc1b2d8f0cd4e8e Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 19 Nov 2013 12:17:34 -0800 Subject: [PATCH 34/75] simpler cache mechanism - removed root keys --- lib/sparql/client.rb | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 42b32cec..d7b61769 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -34,9 +34,6 @@ class ServerError < StandardError; end ACCEPT_TSV = {'Accept' => RESULT_TSV}.freeze ACCEPT_BRTR = {'Accept' => RESULT_BRTR}.freeze - SPARQL_CACHE_QUERIES = "sparql:queries" - SPARQL_CACHE_GRAPHS = "sparql:graphs" - DEFAULT_PROTOCOL = 1.0 DEFAULT_METHOD = :post @@ -429,11 +426,6 @@ def cache_invalidate_graph(graphs) graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") if @redis_cache.exists(graph) begin - query_entries = @redis_cache.smembers(graph) - @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) - query_entries.each do |e| - @redis_cache.del(e) - end @redis_cache.del(graph) rescue => exception puts "warning: error in cache invalidation `#{exception}`" @@ -450,38 +442,13 @@ def cache_invalidate_graph(graphs) end end - def cache_invalidate_all - return if @redis_cache.nil? - to_delete = [] - all_queries = @redis_cache.smembers(SPARQL_CACHE_QUERIES) - all_graphs = @redis_cache.smembers(SPARQL_CACHE_GRAPHS) - @redis_cache.del(SPARQL_CACHE_QUERIES) - @redis_cache.del(SPARQL_CACHE_GRAPHS) - - puts "deleting #{all_queries.length} query entries" - all_queries.each_slice(500_000) do |query_keys| - @redis_cache.del query_keys - end - puts "deleting #{all_graphs.length} graph entries" - all_graphs.each_slice(500_000) do |query_graphs| - @redis_cache.del query_graphs - end - puts "done with the cache deletion" - end - - def query_put_cache(keys,entry) - expiration = 5 * 86400 #5 day - if defined?(SPARQL_CACHE_EXPIRATION_TIME) - expiration = SPARQL_CACHE_EXPIRATION_TIME - end + expiration = 1 * 86400 #1 day @redis_cache.multi do keys[:graphs].each do |g| @redis_cache.sadd(g,keys[:query]) - @redis_cache.sadd(SPARQL_CACHE_GRAPHS,g) end @redis_cache.set(keys[:query],Marshal.dump(entry)) - @redis_cache.sadd(SPARQL_CACHE_QUERIES,keys[:query]) @redis_cache.expire(keys[:query],expiration) end end From e1dfb143a3097066f81bfe13393ed93ec01c37ad Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 19 Nov 2013 12:17:34 -0800 Subject: [PATCH 35/75] simpler cache mechanism - removed root keys --- lib/sparql/client.rb | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 42b32cec..d7b61769 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -34,9 +34,6 @@ class ServerError < StandardError; end ACCEPT_TSV = {'Accept' => RESULT_TSV}.freeze ACCEPT_BRTR = {'Accept' => RESULT_BRTR}.freeze - SPARQL_CACHE_QUERIES = "sparql:queries" - SPARQL_CACHE_GRAPHS = "sparql:graphs" - DEFAULT_PROTOCOL = 1.0 DEFAULT_METHOD = :post @@ -429,11 +426,6 @@ def cache_invalidate_graph(graphs) graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") if @redis_cache.exists(graph) begin - query_entries = @redis_cache.smembers(graph) - @redis_cache.srem(SPARQL_CACHE_QUERIES,query_entries) - query_entries.each do |e| - @redis_cache.del(e) - end @redis_cache.del(graph) rescue => exception puts "warning: error in cache invalidation `#{exception}`" @@ -450,38 +442,13 @@ def cache_invalidate_graph(graphs) end end - def cache_invalidate_all - return if @redis_cache.nil? - to_delete = [] - all_queries = @redis_cache.smembers(SPARQL_CACHE_QUERIES) - all_graphs = @redis_cache.smembers(SPARQL_CACHE_GRAPHS) - @redis_cache.del(SPARQL_CACHE_QUERIES) - @redis_cache.del(SPARQL_CACHE_GRAPHS) - - puts "deleting #{all_queries.length} query entries" - all_queries.each_slice(500_000) do |query_keys| - @redis_cache.del query_keys - end - puts "deleting #{all_graphs.length} graph entries" - all_graphs.each_slice(500_000) do |query_graphs| - @redis_cache.del query_graphs - end - puts "done with the cache deletion" - end - - def query_put_cache(keys,entry) - expiration = 5 * 86400 #5 day - if defined?(SPARQL_CACHE_EXPIRATION_TIME) - expiration = SPARQL_CACHE_EXPIRATION_TIME - end + expiration = 1 * 86400 #1 day @redis_cache.multi do keys[:graphs].each do |g| @redis_cache.sadd(g,keys[:query]) - @redis_cache.sadd(SPARQL_CACHE_GRAPHS,g) end @redis_cache.set(keys[:query],Marshal.dump(entry)) - @redis_cache.sadd(SPARQL_CACHE_QUERIES,keys[:query]) @redis_cache.expire(keys[:query],expiration) end end From a5ddcaa6a4997aaf740376c34c5f1000530862fd Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 7 Feb 2014 17:06:28 -0800 Subject: [PATCH 36/75] NCBO-228: remove hack for mapping counts --- lib/sparql/client.rb | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index d7b61769..97461648 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -299,34 +299,10 @@ def query(query, options = {}) end @op = :query qstart = Time.now - if query.to_s["strbefore"] - options[:content_type] = "text/plain" - end r = response(query, options) query_time = Time.now - qstart pstart = Time.now parsed = parse_response(r, options) - if !query.to_s["strbefore"] - else - if r.body - line = 0 - parsed = {} - r.body.split("\n").each do |x| - line += 1 - next if line == 1 - acr = x[1..-2] - unless parsed.include?(acr) - parsed[acr] = 0 - end - parsed[acr] += 1 - end - if options[:cache_key] - query_put_cache(options[:cache_key],parsed) - end - else - raise Exception, "SPARQL: error returning mapping stats" - end - end parse_time = Time.now - pstart if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] From e6a2d6b4fda4e23d77a70c440f058bd08c801e26 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 10 Feb 2014 16:38:00 -0800 Subject: [PATCH 37/75] as an altenative use graphs from query to build the cache id --- lib/sparql/client.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 97461648..54951d46 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -272,8 +272,9 @@ def query(query, options = {}) unless query.respond_to?(:options) && query.options[:bypass_cache] if @redis_cache && (query.instance_of?(SPARQL::Client::Query) || options[:graphs]) cache_key = nil - if options[:graphs] - cache_key = SPARQL::Client::Query.generate_cache_key(query,options[:graphs]) + if options[:graphs] || query.options[:graphs] + cache_key = SPARQL::Client::Query.generate_cache_key(query.to_s, + options[:graphs] || query.options[:graphs]) else cache_key = query.cache_key end From 106fa6f32c88b0845037a551705c09dd782f7de0 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 22 Apr 2014 10:31:10 -0700 Subject: [PATCH 38/75] NCBO-699 no parsing of text plain in bindings --- lib/sparql/client.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 54951d46..311f6a54 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -26,6 +26,7 @@ class ServerError < StandardError; end RESULT_XML = 'application/sparql-results+xml'.freeze RESULT_CSV = 'text/csv'.freeze RESULT_TSV = 'text/tab-separated-values'.freeze + RESULT_PLAIN = 'text/plain'.freeze RESULT_BOOL = 'text/boolean'.freeze # Sesame-specific RESULT_BRTR = 'application/x-binary-rdf-results-table'.freeze # Sesame-specific ACCEPT_JSON = {'Accept' => RESULT_JSON}.freeze @@ -453,6 +454,8 @@ def parse_response(response, options = {}) self.class.parse_csv_bindings(response.body, nodes) when RESULT_TSV self.class.parse_tsv_bindings(response.body, nodes) + when RESULT_PLAIN + self.class.parse_plain_bindings(response.body, nodes) else parse_rdf_serialization(response, options) end @@ -557,6 +560,10 @@ def self.parse_tsv_bindings(tsv, nodes = {}) solutions end + def self.parse_plain_bindings(plain, nodes = {}) + return plain + end + ## # @param [String, REXML::Element] xml # @return [] From 1cb3adaf2e47d1bc7189fc21e3395343276c488e Mon Sep 17 00:00:00 2001 From: msalvadores Date: Mon, 5 May 2014 17:51:53 -0700 Subject: [PATCH 39/75] NCBO-649 remove pipeline in flush --- lib/sparql/client.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 311f6a54..56322b25 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -422,13 +422,11 @@ def cache_invalidate_graph(graphs) def query_put_cache(keys,entry) expiration = 1 * 86400 #1 day - @redis_cache.multi do - keys[:graphs].each do |g| - @redis_cache.sadd(g,keys[:query]) - end - @redis_cache.set(keys[:query],Marshal.dump(entry)) - @redis_cache.expire(keys[:query],expiration) + keys[:graphs].each do |g| + @redis_cache.sadd(g,keys[:query]) end + @redis_cache.set(keys[:query],Marshal.dump(entry)) + @redis_cache.expire(keys[:query],expiration) end ## From 604ddcc543461ff1d9e4807a3cb1a2c3eecfe531 Mon Sep 17 00:00:00 2001 From: Paul R Alexander Date: Thu, 3 Jul 2014 10:42:50 -0700 Subject: [PATCH 40/75] Check to make sure query has options present --- lib/sparql/client.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 56322b25..cc13fdc1 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -289,8 +289,8 @@ def query(query, options = {}) end end if cache_response - if @cube - @cube.send("goo_cache_hit", DateTime.now, + if @cube + @cube.send("goo_cache_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil) rescue nil end return Marshal.load(cache_response) @@ -309,8 +309,8 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, + if @cube + @cube.send("goo_query_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, query: query.to_s) rescue nil end @@ -337,8 +337,8 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update - if @redis_cache && !query.options[:bypass_cache] - query_delete_cache(query) + if @redis_cache && (!query.respond_to?(:options) || !query.options[:bypass_cache]) + query_delete_cache(query) end case @url when RDF::Queryable @@ -347,8 +347,8 @@ def update(query, options = {}) else start = Time.now parse_response(response(query, options), options) - if @cube - @cube.send("sparql_write_data", DateTime.now, + if @cube + @cube.send("sparql_write_data", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, type_write: query.class.name.split("::")[-1].downcase) rescue nil end @@ -439,7 +439,7 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - if options[:cache_key] + if options[:cache_key] if options[:cache_key] query_put_cache(options[:cache_key],result_data) end From 1438caf1e52623ea1dc992f02abb7ff2c94bc4bb Mon Sep 17 00:00:00 2001 From: Paul R Alexander Date: Thu, 3 Jul 2014 10:42:50 -0700 Subject: [PATCH 41/75] Check to make sure query has options present --- lib/sparql/client.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 56322b25..cc13fdc1 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -289,8 +289,8 @@ def query(query, options = {}) end end if cache_response - if @cube - @cube.send("goo_cache_hit", DateTime.now, + if @cube + @cube.send("goo_cache_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil) rescue nil end return Marshal.load(cache_response) @@ -309,8 +309,8 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, + if @cube + @cube.send("goo_query_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, query: query.to_s) rescue nil end @@ -337,8 +337,8 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update - if @redis_cache && !query.options[:bypass_cache] - query_delete_cache(query) + if @redis_cache && (!query.respond_to?(:options) || !query.options[:bypass_cache]) + query_delete_cache(query) end case @url when RDF::Queryable @@ -347,8 +347,8 @@ def update(query, options = {}) else start = Time.now parse_response(response(query, options), options) - if @cube - @cube.send("sparql_write_data", DateTime.now, + if @cube + @cube.send("sparql_write_data", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, type_write: query.class.name.split("::")[-1].downcase) rescue nil end @@ -439,7 +439,7 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - if options[:cache_key] + if options[:cache_key] if options[:cache_key] query_put_cache(options[:cache_key],result_data) end From 1c1703bc56d16d749b5191537493de8cbc3f6c31 Mon Sep 17 00:00:00 2001 From: Paul R Alexander Date: Thu, 3 Jul 2014 17:17:08 -0700 Subject: [PATCH 42/75] Revert "Check to make sure query has options present" This reverts commit 1438caf1e52623ea1dc992f02abb7ff2c94bc4bb. --- lib/sparql/client.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index cc13fdc1..56322b25 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -289,8 +289,8 @@ def query(query, options = {}) end end if cache_response - if @cube - @cube.send("goo_cache_hit", DateTime.now, + if @cube + @cube.send("goo_cache_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil) rescue nil end return Marshal.load(cache_response) @@ -309,8 +309,8 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, + if @cube + @cube.send("goo_query_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, query: query.to_s) rescue nil end @@ -337,8 +337,8 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update - if @redis_cache && (!query.respond_to?(:options) || !query.options[:bypass_cache]) - query_delete_cache(query) + if @redis_cache && !query.options[:bypass_cache] + query_delete_cache(query) end case @url when RDF::Queryable @@ -347,8 +347,8 @@ def update(query, options = {}) else start = Time.now parse_response(response(query, options), options) - if @cube - @cube.send("sparql_write_data", DateTime.now, + if @cube + @cube.send("sparql_write_data", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, type_write: query.class.name.split("::")[-1].downcase) rescue nil end @@ -439,7 +439,7 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - if options[:cache_key] + if options[:cache_key] if options[:cache_key] query_put_cache(options[:cache_key],result_data) end From fff5f36779bb630652922e5c4b5a558f026e0e2d Mon Sep 17 00:00:00 2001 From: Paul R Alexander Date: Thu, 3 Jul 2014 17:17:17 -0700 Subject: [PATCH 43/75] Revert "Check to make sure query has options present" This reverts commit 604ddcc543461ff1d9e4807a3cb1a2c3eecfe531. --- lib/sparql/client.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index cc13fdc1..56322b25 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -289,8 +289,8 @@ def query(query, options = {}) end end if cache_response - if @cube - @cube.send("goo_cache_hit", DateTime.now, + if @cube + @cube.send("goo_cache_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil) rescue nil end return Marshal.load(cache_response) @@ -309,8 +309,8 @@ def query(query, options = {}) if Thread.current[:ncbo_debug] (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, + if @cube + @cube.send("goo_query_hit", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, query: query.to_s) rescue nil end @@ -337,8 +337,8 @@ def query(query, options = {}) def update(query, options = {}) @op = :update options[:op] = :update - if @redis_cache && (!query.respond_to?(:options) || !query.options[:bypass_cache]) - query_delete_cache(query) + if @redis_cache && !query.options[:bypass_cache] + query_delete_cache(query) end case @url when RDF::Queryable @@ -347,8 +347,8 @@ def update(query, options = {}) else start = Time.now parse_response(response(query, options), options) - if @cube - @cube.send("sparql_write_data", DateTime.now, + if @cube + @cube.send("sparql_write_data", DateTime.now, duration_ms: ((Time.now - start)*1000).ceil, type_write: query.class.name.split("::")[-1].downcase) rescue nil end @@ -439,7 +439,7 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - if options[:cache_key] + if options[:cache_key] if options[:cache_key] query_put_cache(options[:cache_key],result_data) end From 3bce2b793190439d3525450b464afb44452e5706 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 9 Sep 2014 18:36:29 -0700 Subject: [PATCH 44/75] NCBO-971 added incremental cache to all counts --- lib/sparql/client.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 56322b25..9989cbef 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -271,7 +271,8 @@ def query(query, options = {}) #TODO less intrusive ? start = Time.now unless query.respond_to?(:options) && query.options[:bypass_cache] - if @redis_cache && (query.instance_of?(SPARQL::Client::Query) || options[:graphs]) + if @redis_cache && (query.instance_of?(SPARQL::Client::Query) || + options[:graphs]) cache_key = nil if options[:graphs] || query.options[:graphs] cache_key = SPARQL::Client::Query.generate_cache_key(query.to_s, @@ -280,6 +281,10 @@ def query(query, options = {}) cache_key = query.cache_key end cache_response = @redis_cache.get(cache_key[:query]) + if options[:reload_cache] and options[:reload_cache] == true + @redis_cache.del(cache_key[:query]) + cache_response = nil + end if cache_response cache_key[:graphs].each do |g| unless @redis_cache.sismember(g,cache_key[:query]) From 97e31e62483f70d61554cbc4832b936f498513b7 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 7 Oct 2014 15:43:23 -0700 Subject: [PATCH 45/75] we are moving to random eviction - no need for expiration --- lib/sparql/client.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 9989cbef..a0c99fb5 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -426,12 +426,12 @@ def cache_invalidate_graph(graphs) end def query_put_cache(keys,entry) - expiration = 1 * 86400 #1 day + #expiration = 1800 #1/2 hour keys[:graphs].each do |g| @redis_cache.sadd(g,keys[:query]) end @redis_cache.set(keys[:query],Marshal.dump(entry)) - @redis_cache.expire(keys[:query],expiration) + #@redis_cache.expire(keys[:query],expiration) end ## From 0b2e1af46149c486916f9c0d0b059e08843eaedb Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 7 Oct 2014 17:05:04 -0700 Subject: [PATCH 46/75] avoid large cache entries in goo --- lib/sparql/client.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index a0c99fb5..d1d1d9df 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -427,10 +427,15 @@ def cache_invalidate_graph(graphs) def query_put_cache(keys,entry) #expiration = 1800 #1/2 hour + data = Marshal.dump(entry) + if data.length > 2e6 #2MB of marshal object + #avoid large entries to go in the cache + return + end keys[:graphs].each do |g| @redis_cache.sadd(g,keys[:query]) end - @redis_cache.set(keys[:query],Marshal.dump(entry)) + @redis_cache.set(keys[:query],data) #@redis_cache.expire(keys[:query],expiration) end From 5f7d64615ac771b6672addbbfb94ef45c1885d41 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Tue, 7 Oct 2014 17:05:04 -0700 Subject: [PATCH 47/75] avoid large cache entries in goo --- lib/sparql/client.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index a0c99fb5..d1d1d9df 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -427,10 +427,15 @@ def cache_invalidate_graph(graphs) def query_put_cache(keys,entry) #expiration = 1800 #1/2 hour + data = Marshal.dump(entry) + if data.length > 2e6 #2MB of marshal object + #avoid large entries to go in the cache + return + end keys[:graphs].each do |g| @redis_cache.sadd(g,keys[:query]) end - @redis_cache.set(keys[:query],Marshal.dump(entry)) + @redis_cache.set(keys[:query],data) #@redis_cache.expire(keys[:query],expiration) end From 9a91b7db5f3fe2befb67a365aca737be7093f0e3 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 7 Nov 2014 16:31:06 -0800 Subject: [PATCH 48/75] debug redis cache --- lib/sparql/client.rb | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index d1d1d9df..5bbb945f 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -280,17 +280,42 @@ def query(query, options = {}) else cache_key = query.cache_key end + if $DEBUG_GOO_CACHE + puts "#CACHE QUERY" + puts query.to_s + puts "#CACHE KEY" + puts cache_key + end cache_response = @redis_cache.get(cache_key[:query]) if options[:reload_cache] and options[:reload_cache] == true @redis_cache.del(cache_key[:query]) cache_response = nil end + if $DEBUG_GOO_CACHE + puts "#CACHE LEVEL 1 #{cache_response != nil}" + if cache_response != nil + x = Marshal.load(cache_response) + x.each do |sol| + puts sol.inspect + end + end + end if cache_response + if $DEBUG_GOO_CACHE + puts "#CACHE LEVEL 2" + end cache_key[:graphs].each do |g| unless @redis_cache.sismember(g,cache_key[:query]) + if $DEBUG_GOO_CACHE + puts "#CACHE LEVEL 2 not in graph set #{g}" + end @redis_cache.del(cache_key[:query]) cache_response = nil break + else + if $DEBUG_GOO_CACHE + puts "#CACHE LEVEL 2 OK graph set #{g}" + end end end if cache_response @@ -312,7 +337,8 @@ def query(query, options = {}) parsed = parse_response(r, options) parse_time = Time.now - pstart if Thread.current[:ncbo_debug] - (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] + (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << + [query_time,parse_time] end if @cube @cube.send("goo_query_hit", DateTime.now, @@ -320,14 +346,6 @@ def query(query, options = {}) query: query.to_s) rescue nil end return parsed - #@op = :query - #case @url - #when RDF::Queryable - # require 'sparql' unless defined?(::SPARQL::Grammar) - # SPARQL.execute(query, @url, options) - #else - # parse_response(response(query, options), options) - #end end ## From a5251d5902f84ab108e37fa17f7374ac12887e3a Mon Sep 17 00:00:00 2001 From: msalvadores Date: Fri, 7 Nov 2014 17:09:44 -0800 Subject: [PATCH 49/75] debug redis cache --- lib/sparql/client.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 5bbb945f..817c8338 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -295,8 +295,12 @@ def query(query, options = {}) puts "#CACHE LEVEL 1 #{cache_response != nil}" if cache_response != nil x = Marshal.load(cache_response) - x.each do |sol| - puts sol.inspect + if x.respond_to?(:each) + x.each do |sol| + puts sol.inspect + end + else + puts x end end end From 2c34b6a1bd93483cd2ff66e5e979b9c0e851a502 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 4 Feb 2015 15:56:40 -0800 Subject: [PATCH 50/75] Revert "debug redis cache" This reverts commit a5251d5902f84ab108e37fa17f7374ac12887e3a. --- lib/sparql/client.rb | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 817c8338..5bbb945f 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -295,12 +295,8 @@ def query(query, options = {}) puts "#CACHE LEVEL 1 #{cache_response != nil}" if cache_response != nil x = Marshal.load(cache_response) - if x.respond_to?(:each) - x.each do |sol| - puts sol.inspect - end - else - puts x + x.each do |sol| + puts sol.inspect end end end From ead7d226ff679969cbbb789c33d5660b45d5a521 Mon Sep 17 00:00:00 2001 From: msalvadores Date: Wed, 4 Feb 2015 15:56:57 -0800 Subject: [PATCH 51/75] Revert "debug redis cache" This reverts commit 9a91b7db5f3fe2befb67a365aca737be7093f0e3. --- lib/sparql/client.rb | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 5bbb945f..d1d1d9df 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -280,42 +280,17 @@ def query(query, options = {}) else cache_key = query.cache_key end - if $DEBUG_GOO_CACHE - puts "#CACHE QUERY" - puts query.to_s - puts "#CACHE KEY" - puts cache_key - end cache_response = @redis_cache.get(cache_key[:query]) if options[:reload_cache] and options[:reload_cache] == true @redis_cache.del(cache_key[:query]) cache_response = nil end - if $DEBUG_GOO_CACHE - puts "#CACHE LEVEL 1 #{cache_response != nil}" - if cache_response != nil - x = Marshal.load(cache_response) - x.each do |sol| - puts sol.inspect - end - end - end if cache_response - if $DEBUG_GOO_CACHE - puts "#CACHE LEVEL 2" - end cache_key[:graphs].each do |g| unless @redis_cache.sismember(g,cache_key[:query]) - if $DEBUG_GOO_CACHE - puts "#CACHE LEVEL 2 not in graph set #{g}" - end @redis_cache.del(cache_key[:query]) cache_response = nil break - else - if $DEBUG_GOO_CACHE - puts "#CACHE LEVEL 2 OK graph set #{g}" - end end end if cache_response @@ -337,8 +312,7 @@ def query(query, options = {}) parsed = parse_response(r, options) parse_time = Time.now - pstart if Thread.current[:ncbo_debug] - (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << - [query_time,parse_time] + (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end if @cube @cube.send("goo_query_hit", DateTime.now, @@ -346,6 +320,14 @@ def query(query, options = {}) query: query.to_s) rescue nil end return parsed + #@op = :query + #case @url + #when RDF::Queryable + # require 'sparql' unless defined?(::SPARQL::Grammar) + # SPARQL.execute(query, @url, options) + #else + # parse_response(response(query, options), options) + #end end ## From 41dab44973c3f086bf335c2513cc208144e99a74 Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 16 Mar 2016 10:55:19 -0700 Subject: [PATCH 52/75] increased cache storage buffer to 10Mb to address NCBO-1673 - can't get READY submissions from API --- .gitignore | 1 + lib/sparql/client.rb | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 825a55b5..c668fa84 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *~ *.gem *.lock +.idea pkg tmp diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index d1d1d9df..66e3ee9d 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -69,6 +69,7 @@ class ServerError < StandardError; end # @option options [Hash] :headers # @option options [Hash] :read_timeout def initialize(url, options = {}, &block) + @logger = options[:logger] ||= Kernel.const_defined?("LOGGER") ? Kernel.const_get("LOGGER") : Logger.new(STDOUT) @redis_cache = nil if options[:redis_cache] @redis_cache = options[:redis_cache] @@ -428,7 +429,7 @@ def cache_invalidate_graph(graphs) def query_put_cache(keys,entry) #expiration = 1800 #1/2 hour data = Marshal.dump(entry) - if data.length > 2e6 #2MB of marshal object + if data.length > 10e6 #10MB of marshal object #avoid large entries to go in the cache return end @@ -450,9 +451,7 @@ def parse_response(response, options = {}) when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) if options[:cache_key] - if options[:cache_key] - query_put_cache(options[:cache_key],result_data) - end + query_put_cache(options[:cache_key],result_data) end return result_data when RESULT_XML From 190fa44c3b63907d11555f5c18a86493cb9722d0 Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 6 May 2016 11:18:30 -0700 Subject: [PATCH 53/75] added a commented out debug statement that allows to track the source of the costly COUNT queries --- lib/sparql/client.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 66e3ee9d..acca5611 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -269,6 +269,11 @@ def nodes # @return [Array] # @see http://www.w3.org/TR/sparql11-protocol/#query-operation def query(query, options = {}) + # pat = /SELECT\s+\(\s*COUNT\(DISTINCT\s+\?id\)\s+AS\s+\?count_var\s*\)\s+FROM\s+\\s+WHERE\s+{\s+\?id\s+a\s+\\s+\.\s+}/ + # if query && (query.to_s =~ pat) != nil + # @logger.info("#{query.to_s}") + # @logger.info(caller.join("\n\t")) + # end #TODO less intrusive ? start = Time.now unless query.respond_to?(:options) && query.options[:bypass_cache] From a0076dedc65d6620b88223c7f69dbe5c5ada3b56 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 7 Jun 2016 13:45:37 -0700 Subject: [PATCH 54/75] implemented NCBO-1757 - Increase the Goo cache limit to 50 Mb --- lib/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index acca5611..c8eb4bb8 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -434,7 +434,7 @@ def cache_invalidate_graph(graphs) def query_put_cache(keys,entry) #expiration = 1800 #1/2 hour data = Marshal.dump(entry) - if data.length > 10e6 #10MB of marshal object + if data.length > 50e6 #50MB of marshal object #avoid large entries to go in the cache return end From 7a2e3a2844bc9a8984134fa7b2e2ecb9c55856ae Mon Sep 17 00:00:00 2001 From: Jennifer Vendetti Date: Mon, 10 Oct 2016 13:05:02 -0700 Subject: [PATCH 55/75] Set net-http-persistent gem at 2.9.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anything higher than this version results in failures in the ontologies_linked_data project unit tests: “EXITING: Cannot connect to triplestore and/or search server: wrong number of arguments (2 for 0)”. --- .gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gemspec b/.gemspec index 75e19380..803e4ca7 100755 --- a/.gemspec +++ b/.gemspec @@ -29,7 +29,7 @@ Gem::Specification.new do |gem| gem.required_ruby_version = '>= 1.8.1' gem.requirements = [] gem.add_runtime_dependency 'rdf', '>= 1.0' - gem.add_runtime_dependency 'net-http-persistent', '>= 1.4' + gem.add_runtime_dependency 'net-http-persistent', '2.9.4' gem.add_runtime_dependency 'json_pure', '>= 1.4' gem.add_development_dependency 'sparql', '>= 1.0' unless RUBY_VERSION < "1.9" gem.add_development_dependency 'rdf-spec', '>= 1.0' From 94c792d7fb73bec29bb08e92fd91de30591a54a3 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 15 Nov 2016 11:19:54 -0800 Subject: [PATCH 56/75] AllegroGraph testing --- lib/sparql/client.rb | 35 +++++++++++++++++++++++++++++++++-- lib/sparql/client/query.rb | 16 ++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index c8eb4bb8..0bc7563d 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -455,9 +455,31 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - if options[:cache_key] + + + + if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" + result_data = [] + end + + + + if options[:cache_key] query_put_cache(options[:cache_key],result_data) end + + + + # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" + # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\",\"c\"]},\n \"results\": {\n \"bindings\":[]\n }}\n" + + + + # if result_data.length == 1 && !result_data[0][:c].nil? && result_data[0][:c].is_a?(RDF::Literal::Integer) && result_data[0][:c].value == "0" + # result_data = [] + # end + + return result_data when RESULT_XML #self.class.parse_xml_nokiri(response.body, nodes) @@ -728,11 +750,20 @@ def request(query, headers = {}, op = :query, query_options = nil, &block) method = (self.options[:method] || DEFAULT_METHOD).to_sym request = send("make_#{method}_request", query,op , headers, query_options) - request.basic_auth(url.user, url.password) if url.user && !url.user.empty? + # request.basic_auth(url.user, url.password) if url.user && !url.user.empty? @http.open_timeout = @http.read_timeout @http.idle_timeout = nil + + response = @http.request(url, request) + + + + # binding.pry if query == " SELECT ?g (count(?s1) as ?c)\n WHERE {\n {\n GRAPH {\n ?s1 ?o .\n }\n GRAPH ?g {\n ?s2 ?o .\n }\n}\n\n FILTER (?s1 != ?s2)\nFILTER (!STRSTARTS(str(?g),'http://data.bioontology.org/ontologies/MAPPING_TEST1'))\n } GROUP BY ?g\n" + + + if block_given? block.call(response) else diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 006eb45f..c7edaf8f 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -420,6 +420,22 @@ def to_s options[:optionals].each do |patterns| buffer << 'OPTIONAL {' buffer += serialize_patterns(patterns) + + + + + + + buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } + + + + + + + + + buffer << '}' end end From 2586eaa5bae4e41c24e7bdc10c985244df2521e7 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 10 Jan 2017 13:30:45 -0800 Subject: [PATCH 57/75] fixed gemspec in Gemfile --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 8e9d2919..cafa8411 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,6 @@ source "https://rubygems.org" -gemspec :name => "" +gemspec gem "jruby-openssl", :platforms => :jruby gem 'cube-ruby', require: "cube" From af2ab58f41c70c109f45b376c1a3a6a55b126453 Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 25 May 2017 17:34:45 -0700 Subject: [PATCH 58/75] Gemfile fix --- Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 8e9d2919..cafa8411 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,6 @@ source "https://rubygems.org" -gemspec :name => "" +gemspec gem "jruby-openssl", :platforms => :jruby gem 'cube-ruby', require: "cube" From 657883c87cb64f4a8ecd04e72f3b1d2a97c92d96 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 8 Aug 2017 12:56:33 -0700 Subject: [PATCH 59/75] fixed invalid constructs in the gemspec file, which caused the latest bundler (v1.15.3) to break --- .gemspec | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.gemspec b/.gemspec index 803e4ca7..e9693679 100755 --- a/.gemspec +++ b/.gemspec @@ -2,9 +2,8 @@ # -*- encoding: utf-8 -*- Gem::Specification.new do |gem| - gem.version = File.read('VERSION').chomp - gem.date = File.mtime('VERSION').strftime('%Y-%m-%d') - + gem.version = '1.0.1' + gem.date = '2012-11-21' gem.name = 'sparql-client' gem.homepage = 'http://ruby-rdf.github.com/sparql-client/' gem.license = 'Public Domain' if gem.respond_to?(:license=) From f7b6888b923f312a1190957c6f96a2ba0ffd2e1c Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 19 Dec 2017 12:13:30 -0800 Subject: [PATCH 60/75] a commented out code for better compatibility with Allegrograph --- lib/sparql/client/query.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 006eb45f..9642d3e1 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -420,6 +420,19 @@ def to_s options[:optionals].each do |patterns| buffer << 'OPTIONAL {' buffer += serialize_patterns(patterns) + + + + + + + # buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } + + + + + + buffer << '}' end end From 65a8de3940191fe9d248fa61ee59588acc898e91 Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 6 Jun 2018 16:52:11 -0700 Subject: [PATCH 61/75] AG testing --- lib/sparql/client/query.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 9642d3e1..aa094006 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -426,7 +426,7 @@ def to_s - # buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } + buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } From 614bf5752409a6604e53b184723fc6f9cff2eb15 Mon Sep 17 00:00:00 2001 From: mdorf Date: Tue, 25 Sep 2018 15:17:38 -0700 Subject: [PATCH 62/75] added query debugging statements --- lib/sparql/client.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 0bc7563d..eed64220 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -318,13 +318,17 @@ def query(query, options = {}) parsed = parse_response(r, options) parse_time = Time.now - pstart if Thread.current[:ncbo_debug] + @logger.info("************************* Query *************************") + @logger.info(query.to_s) + @logger.info("************************ Duration ***********************") + @logger.info("#{Time.now - start} sec.\n") (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, - duration_ms: ((Time.now - start)*1000).ceil, - query: query.to_s) rescue nil - end + # if @cube + # @cube.send("goo_query_hit", DateTime.now, + # duration_ms: ((Time.now - start)*1000).ceil, + # query: query.to_s) rescue nil + # end return parsed #@op = :query #case @url From 523b58e676dae8d6517019ce3cc3784b06314b8d Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 26 Sep 2018 15:34:27 -0700 Subject: [PATCH 63/75] query output in debugging mode --- lib/sparql/client.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index eed64220..a561e41b 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -318,8 +318,7 @@ def query(query, options = {}) parsed = parse_response(r, options) parse_time = Time.now - pstart if Thread.current[:ncbo_debug] - @logger.info("************************* Query *************************") - @logger.info(query.to_s) + @logger.info("************************* Query *************************\n#{query.to_s}") @logger.info("************************ Duration ***********************") @logger.info("#{Time.now - start} sec.\n") (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] From 3eb2b95f9311b551f1575f0dbf29a5ee1437bf9f Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 26 Sep 2018 15:35:53 -0700 Subject: [PATCH 64/75] query output in debugging mode --- lib/sparql/client.rb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index c8eb4bb8..df50c6c1 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -318,13 +318,16 @@ def query(query, options = {}) parsed = parse_response(r, options) parse_time = Time.now - pstart if Thread.current[:ncbo_debug] + @logger.info("************************* Query *************************\n#{query.to_s}") + @logger.info("************************ Duration ***********************") + @logger.info("#{Time.now - start} sec.\n") (Thread.current[:ncbo_debug][:sparql_queries] ||= []) << [query_time,parse_time] end - if @cube - @cube.send("goo_query_hit", DateTime.now, - duration_ms: ((Time.now - start)*1000).ceil, - query: query.to_s) rescue nil - end + # if @cube + # @cube.send("goo_query_hit", DateTime.now, + # duration_ms: ((Time.now - start)*1000).ceil, + # query: query.to_s) rescue nil + # end return parsed #@op = :query #case @url From 3b7e2b14c83a6ffde90c6f7e78c3b5767c2e3a4c Mon Sep 17 00:00:00 2001 From: mdorf Date: Mon, 28 Jan 2019 10:59:00 -0800 Subject: [PATCH 65/75] Removed deprecated config params from gemspec file --- .gemspec | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gemspec b/.gemspec index e9693679..5e0b576e 100755 --- a/.gemspec +++ b/.gemspec @@ -19,11 +19,9 @@ Gem::Specification.new do |gem| gem.files = %w(AUTHORS CREDITS README UNLICENSE VERSION) + Dir.glob('lib/**/*.rb') gem.bindir = %q(bin) gem.executables = %w() - gem.default_executable = gem.executables.first gem.require_paths = %w(lib) gem.extensions = %w() gem.test_files = %w() - gem.has_rdoc = false gem.required_ruby_version = '>= 1.8.1' gem.requirements = [] From 78aa6933ea1cfb45e6d6adc10ecce33be35d0370 Mon Sep 17 00:00:00 2001 From: mdorf Date: Mon, 30 Mar 2020 11:31:51 -0700 Subject: [PATCH 66/75] added additional comments for changes related to AG --- lib/sparql/client.rb | 44 +++++++++++++++++--------------------- lib/sparql/client/query.rb | 20 +++++++---------- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index a561e41b..6a4fc94d 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -458,31 +458,36 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - - + # This is a special case for AG, in which results_data gets set to nil when an Integer value is 0 + # This is evident when running the test_mappings of ontologies_linked_data. The error is included below: + # + # Problem with custom id generation: comparison of String with nil failed (Goo::Base::IDGenerationError) + # /dev/ncbo/goo/lib/goo/base/resource.rb:92:in `rescue in id' + # /dev/ncbo/goo/lib/goo/base/resource.rb:89:in `id' + # /dev/ncbo/goo/lib/goo/sparql/triples.rb:50:in `model_update_triples' + # /dev/ncbo/goo/lib/goo/base/resource.rb:336:in `save' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/models/base.rb:13:in `save' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:751:in `block (2 levels) in create_mapping_counts' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:737:in `each_key' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:737:in `block in create_mapping_counts' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:720:in `each' + # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:720:in `create_mapping_counts' + # /dev/ncbo/ontologies_linked_data/test/models/test_mappings.rb:40:in `ontologies_parse' + # /dev/ncbo/ontologies_linked_data/test/models/test_mappings.rb:15:in `before_suite' + # /dev/ncbo/ontologies_linked_data/test/test_case.rb:70:in `_run_suite' if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" + # if result_data.length == 1 && !result_data[0][:c].nil? && result_data[0][:c].is_a?(RDF::Literal::Integer) && result_data[0][:c].value == "0" + # if response.body.to_s.include? "\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"" result_data = [] end - - if options[:cache_key] query_put_cache(options[:cache_key],result_data) end - - # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\",\"c\"]},\n \"results\": {\n \"bindings\":[]\n }}\n" - - - - # if result_data.length == 1 && !result_data[0][:c].nil? && result_data[0][:c].is_a?(RDF::Literal::Integer) && result_data[0][:c].value == "0" - # result_data = [] - # end - - return result_data when RESULT_XML #self.class.parse_xml_nokiri(response.body, nodes) @@ -753,20 +758,11 @@ def request(query, headers = {}, op = :query, query_options = nil, &block) method = (self.options[:method] || DEFAULT_METHOD).to_sym request = send("make_#{method}_request", query,op , headers, query_options) - # request.basic_auth(url.user, url.password) if url.user && !url.user.empty? + request.basic_auth(url.user, url.password) if url.user && !url.user.empty? @http.open_timeout = @http.read_timeout @http.idle_timeout = nil - - response = @http.request(url, request) - - - - # binding.pry if query == " SELECT ?g (count(?s1) as ?c)\n WHERE {\n {\n GRAPH {\n ?s1 ?o .\n }\n GRAPH ?g {\n ?s2 ?o .\n }\n}\n\n FILTER (?s1 != ?s2)\nFILTER (!STRSTARTS(str(?g),'http://data.bioontology.org/ontologies/MAPPING_TEST1'))\n } GROUP BY ?g\n" - - - if block_given? block.call(response) else diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index aa094006..5f1a7fe7 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -420,19 +420,15 @@ def to_s options[:optionals].each do |patterns| buffer << 'OPTIONAL {' buffer += serialize_patterns(patterns) - - - - - - + # This is added to move the filters into the OPTIONAL clauses. Based by this comment from AG: + # Because the FILTERs are _outside_ the OPTIONALs, they are applied to _every_ + # row returned. i.e., only rows where ?rewrite0 is in its list _and_ ?rewrite1 + # is in its list will be returned. I.e., the query will return NO results where + # ?rewrite0 or ?rewrite1 is NULL. + # + # All you need to do is to make sure that the FILTERS are applied only _inside_ + # each OPTIONAL. For example, this query will do what you want: buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } - - - - - - buffer << '}' end end From 60fc3035ff58af370000bbe5f6c467cb04e2c843 Mon Sep 17 00:00:00 2001 From: mdorf Date: Mon, 30 Mar 2020 12:43:16 -0700 Subject: [PATCH 67/75] code cleanup and additional comments based on previous conversations with AG --- lib/sparql/client/query.rb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 5f1a7fe7..34d200ef 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -421,13 +421,6 @@ def to_s buffer << 'OPTIONAL {' buffer += serialize_patterns(patterns) # This is added to move the filters into the OPTIONAL clauses. Based by this comment from AG: - # Because the FILTERs are _outside_ the OPTIONALs, they are applied to _every_ - # row returned. i.e., only rows where ?rewrite0 is in its list _and_ ?rewrite1 - # is in its list will be returned. I.e., the query will return NO results where - # ?rewrite0 or ?rewrite1 is NULL. - # - # All you need to do is to make sure that the FILTERS are applied only _inside_ - # each OPTIONAL. For example, this query will do what you want: buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } buffer << '}' end From 2025f80c5520b09215a9ed543ae32b71a98e261d Mon Sep 17 00:00:00 2001 From: mdorf Date: Wed, 1 Apr 2020 12:16:26 -0700 Subject: [PATCH 68/75] removed redundant comment --- lib/sparql/client.rb | 3 --- lib/sparql/client/query.rb | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 6a4fc94d..610f90eb 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -485,9 +485,6 @@ def parse_response(response, options = {}) if options[:cache_key] query_put_cache(options[:cache_key],result_data) end - - # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" - # binding.pry if response.body == "{\"head\":{\"vars\":[\"g\",\"c\"]},\n \"results\": {\n \"bindings\":[]\n }}\n" return result_data when RESULT_XML #self.class.parse_xml_nokiri(response.body, nodes) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 34d200ef..f819459c 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -420,7 +420,7 @@ def to_s options[:optionals].each do |patterns| buffer << 'OPTIONAL {' buffer += serialize_patterns(patterns) - # This is added to move the filters into the OPTIONAL clauses. Based by this comment from AG: + # This is added to move the filters into the OPTIONAL clauses for AG compatibility buffer += patterns.map { |pattern| "FILTER(#{pattern.options[:filter]})" if pattern.options && pattern.options[:filter] } buffer << '}' end From 33346e8b7cac07a5cec7866293b0700ce114450d Mon Sep 17 00:00:00 2001 From: Jennifer Vendetti Date: Wed, 10 Jun 2020 11:34:02 -0700 Subject: [PATCH 69/75] Ignore rbenv files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index c668fa84..ffcf86e8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ pkg tmp *.swp + +# rbenv +.ruby-version From 2ab48e2b7b2aca5fd7ab99b9de36151bd7510de0 Mon Sep 17 00:00:00 2001 From: mdorf Date: Mon, 22 Jun 2020 10:23:47 -0700 Subject: [PATCH 70/75] removed the fix for ncbo/goo#104 --- lib/sparql/client.rb | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 610f90eb..3ea76642 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -458,29 +458,6 @@ def parse_response(response, options = {}) response.body == 'true' when RESULT_JSON result_data = self.class.parse_json_bindings(response.body, nodes) - # This is a special case for AG, in which results_data gets set to nil when an Integer value is 0 - # This is evident when running the test_mappings of ontologies_linked_data. The error is included below: - # - # Problem with custom id generation: comparison of String with nil failed (Goo::Base::IDGenerationError) - # /dev/ncbo/goo/lib/goo/base/resource.rb:92:in `rescue in id' - # /dev/ncbo/goo/lib/goo/base/resource.rb:89:in `id' - # /dev/ncbo/goo/lib/goo/sparql/triples.rb:50:in `model_update_triples' - # /dev/ncbo/goo/lib/goo/base/resource.rb:336:in `save' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/models/base.rb:13:in `save' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:751:in `block (2 levels) in create_mapping_counts' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:737:in `each_key' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:737:in `block in create_mapping_counts' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:720:in `each' - # /dev/ncbo/ontologies_linked_data/lib/ontologies_linked_data/mappings/mappings.rb:720:in `create_mapping_counts' - # /dev/ncbo/ontologies_linked_data/test/models/test_mappings.rb:40:in `ontologies_parse' - # /dev/ncbo/ontologies_linked_data/test/models/test_mappings.rb:15:in `before_suite' - # /dev/ncbo/ontologies_linked_data/test/test_case.rb:70:in `_run_suite' - - if response.body == "{\"head\":{\"vars\":[\"g\", \"c\"]},\"results\":{\"bindings\":[\n {\"c\":{\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"}}]}}" - # if result_data.length == 1 && !result_data[0][:c].nil? && result_data[0][:c].is_a?(RDF::Literal::Integer) && result_data[0][:c].value == "0" - # if response.body.to_s.include? "\"type\":\"literal\",\"datatype\":\"http://www.w3.org/2001/XMLSchema#integer\", \"value\":\"0\"" - result_data = [] - end if options[:cache_key] query_put_cache(options[:cache_key],result_data) From a1e6beaf6790df68921cbed9bd1719299b9add3e Mon Sep 17 00:00:00 2001 From: mdorf Date: Thu, 9 Jul 2020 08:22:56 -0700 Subject: [PATCH 71/75] fixed #3 - deprecation warnings --- lib/sparql/client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index 3ea76642..bc5bc15a 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -416,7 +416,7 @@ def cache_invalidate_graph(graphs) begin graph = graph.to_s graph = "sparql:graph:#{graph}" unless graph.start_with?("sparql:graph:") - if @redis_cache.exists(graph) + if @redis_cache.exists?(graph) begin @redis_cache.del(graph) rescue => exception From d8200c767956797970f771962df8dc60e048719d Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 19 Mar 2021 18:06:22 -0700 Subject: [PATCH 72/75] added some commented code for debugging queries --- lib/sparql/client.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/sparql/client.rb b/lib/sparql/client.rb index bc5bc15a..cfbeb2e7 100644 --- a/lib/sparql/client.rb +++ b/lib/sparql/client.rb @@ -312,6 +312,11 @@ def query(query, options = {}) end @op = :query qstart = Time.now + # pat = /submissionStatus/ + # if query && (query.to_s =~ pat) != nil + # @logger.info("#{query.to_s}") + # @logger.info(caller.join("\n\t")) + # end r = response(query, options) query_time = Time.now - qstart pstart = Time.now From 27517e222f4ad4565c7f98e4a13820a33ebaebf2 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 20 Jul 2022 16:03:43 +0200 Subject: [PATCH 73/75] add optional_union_with_bind_as --- lib/sparql/client/query.rb | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index f819459c..647c50f7 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -262,6 +262,14 @@ def union_with_bind_as(*pattern_list) self end + def optional_union_with_bind_as(*pattern_list) + options[:optional_unions_with_bind] ||= [] + + pattern_list.each do |patterns,bind,filter| + options[:optional_unions_with_bind] << [build_patterns(patterns), bind, filter] + end + self + end def cache_key return nil if options[:from].nil? || options[:from].empty? @@ -394,10 +402,38 @@ def to_s buffer << "{ #{sq.to_s} } ." end + def add_union_with_bind(patterns) + include_union = nil + buffer = [] + patterns.each do |pattern, options| + buffer << include_union if include_union + buffer << '{' + buffer += serialize_patterns(pattern) + if options[:filters] + buffer += options[:filters].map do |filter| + str = filter[:values].map do |val| + "?#{filter[:predicate]} = <#{val}>" + end + "FILTER(#{str.join(' || ')}) " + end + end + + if options[:binds] + buffer += options[:binds].map { |bind| "BIND( \"#{bind[:value]}\" as ?#{bind[:as]})" } + end + + + buffer << '}' + include_union = "UNION " + end + buffer + end + buffer += serialize_patterns(patterns) if options[:unions] include_union = nil options[:unions].each do |union_block| + buffer << include_union if include_union buffer << '{' buffer += serialize_patterns(union_block) From 05a31f305446e1f2a9c39f08545fec76019e667e Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Wed, 20 Jul 2022 16:04:56 +0200 Subject: [PATCH 74/75] update :unions_with_bind and add :optional_unions_with_bind --- lib/sparql/client/query.rb | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index 647c50f7..c155871f 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -442,16 +442,15 @@ def add_union_with_bind(patterns) end end if options[:unions_with_bind] - include_union = nil - options[:unions_with_bind].each do |union_block, value_bind, var_bind| - buffer << include_union if include_union - buffer << '{' - buffer += serialize_patterns(union_block) - buffer << "BIND (\"#{value_bind}\" as ?#{var_bind.to_s})" - buffer << '}' - include_union = "UNION " - end + buffer << add_union_with_bind(options[:unions_with_bind]) end + + if options[:optional_unions_with_bind] && !options[:optional_unions_with_bind].empty? + buffer << 'OPTIONAL {' + buffer << add_union_with_bind(options[:optional_unions_with_bind]) + buffer << '}' + end + if options[:optionals] options[:optionals].each do |patterns| buffer << 'OPTIONAL {' From 180c818f7715baac64b2699bb452ef5c756f62c5 Mon Sep 17 00:00:00 2001 From: Syphax bouazzouni Date: Thu, 29 Feb 2024 01:48:39 +0100 Subject: [PATCH 75/75] fix: enforce string literals serialization to add xsd:string if given --- lib/sparql/client/query.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/sparql/client/query.rb b/lib/sparql/client/query.rb index c155871f..4210ba5d 100644 --- a/lib/sparql/client/query.rb +++ b/lib/sparql/client/query.rb @@ -496,7 +496,11 @@ def serialize_patterns(patterns) if i == 1 && v.equal?(rdf_type) 'a' # abbreviate RDF.type in the predicate position per SPARQL grammar else - SPARQL::Client.serialize_value(v) + sv = SPARQL::Client.serialize_value(v) + if v.is_a?(RDF::Literal) && v.original_datatype&.to_s.eql?(RDF::XSD.string.to_s) + sv = "#{sv}^^" # 4store and Virtuoso need explicit string type + end + sv end end serialized_pattern.join(' ') + ' .'