diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/Gemfile b/Gemfile index 2a534f54..e9db0eca 100644 --- a/Gemfile +++ b/Gemfile @@ -11,4 +11,7 @@ gemspec # your gem to rubygems.org. # To use a debugger -# gem 'byebug', group: [:development, :test] +gem 'byebug', group: [:development, :test] +gem "rspec-rails" +gem "factory_bot_rails" +gem "oai" diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 00000000..e6b39818 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,167 @@ +PATH + remote: . + specs: + bulkrax (0.1.0) + iso8601 (~> 0.9.0) + language_list (~> 1.2, >= 1.2.1) + libxml-ruby (~> 3.1.0) + oai (~> 0.4) + rails (~> 5.1.6) + simple_form (~> 3.2, <= 3.5.0) + +GEM + remote: https://rubygems.org/ + specs: + actioncable (5.1.6) + actionpack (= 5.1.6) + nio4r (~> 2.0) + websocket-driver (~> 0.6.1) + actionmailer (5.1.6) + actionpack (= 5.1.6) + actionview (= 5.1.6) + activejob (= 5.1.6) + mail (~> 2.5, >= 2.5.4) + rails-dom-testing (~> 2.0) + actionpack (5.1.6) + actionview (= 5.1.6) + activesupport (= 5.1.6) + rack (~> 2.0) + rack-test (>= 0.6.3) + rails-dom-testing (~> 2.0) + rails-html-sanitizer (~> 1.0, >= 1.0.2) + actionview (5.1.6) + activesupport (= 5.1.6) + builder (~> 3.1) + erubi (~> 1.4) + rails-dom-testing (~> 2.0) + rails-html-sanitizer (~> 1.0, >= 1.0.3) + activejob (5.1.6) + activesupport (= 5.1.6) + globalid (>= 0.3.6) + activemodel (5.1.6) + activesupport (= 5.1.6) + activerecord (5.1.6) + activemodel (= 5.1.6) + activesupport (= 5.1.6) + arel (~> 8.0) + activesupport (5.1.6) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + arel (8.0.0) + builder (3.2.3) + byebug (10.0.2) + concurrent-ruby (1.0.5) + crass (1.0.4) + diff-lcs (1.3) + erubi (1.7.1) + factory_bot (4.11.1) + activesupport (>= 3.0.0) + factory_bot_rails (4.11.1) + factory_bot (~> 4.11.1) + railties (>= 3.0.0) + faraday (0.15.3) + multipart-post (>= 1.2, < 3) + faraday_middleware (0.12.2) + faraday (>= 0.7.4, < 1.0) + globalid (0.4.1) + activesupport (>= 4.2.0) + i18n (1.1.0) + concurrent-ruby (~> 1.0) + iso8601 (0.9.1) + language_list (1.2.1) + libxml-ruby (3.1.0) + loofah (2.2.2) + crass (~> 1.0.2) + nokogiri (>= 1.5.9) + mail (2.7.1) + mini_mime (>= 0.1.1) + method_source (0.9.0) + mini_mime (1.0.1) + mini_portile2 (2.3.0) + minitest (5.11.3) + multipart-post (2.0.0) + nio4r (2.3.1) + nokogiri (1.8.5) + mini_portile2 (~> 2.3.0) + oai (0.4.0) + builder (>= 3.1.0) + faraday + faraday_middleware + rack (2.0.5) + rack-test (1.1.0) + rack (>= 1.0, < 3) + rails (5.1.6) + actioncable (= 5.1.6) + actionmailer (= 5.1.6) + actionpack (= 5.1.6) + actionview (= 5.1.6) + activejob (= 5.1.6) + activemodel (= 5.1.6) + activerecord (= 5.1.6) + activesupport (= 5.1.6) + bundler (>= 1.3.0) + railties (= 5.1.6) + sprockets-rails (>= 2.0.0) + rails-dom-testing (2.0.3) + activesupport (>= 4.2.0) + nokogiri (>= 1.6) + rails-html-sanitizer (1.0.4) + loofah (~> 2.2, >= 2.2.2) + railties (5.1.6) + actionpack (= 5.1.6) + activesupport (= 5.1.6) + method_source + rake (>= 0.8.7) + thor (>= 0.18.1, < 2.0) + rake (12.3.1) + rspec-core (3.8.0) + rspec-support (~> 3.8.0) + rspec-expectations (3.8.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.8.0) + rspec-mocks (3.8.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.8.0) + rspec-rails (3.8.0) + actionpack (>= 3.0) + activesupport (>= 3.0) + railties (>= 3.0) + rspec-core (~> 3.8.0) + rspec-expectations (~> 3.8.0) + rspec-mocks (~> 3.8.0) + rspec-support (~> 3.8.0) + rspec-support (3.8.0) + simple_form (3.5.0) + actionpack (> 4, < 5.2) + activemodel (> 4, < 5.2) + sprockets (3.7.2) + concurrent-ruby (~> 1.0) + rack (> 1, < 3) + sprockets-rails (3.2.1) + actionpack (>= 4.0) + activesupport (>= 4.0) + sprockets (>= 3.0.0) + sqlite3 (1.3.13) + thor (0.20.0) + thread_safe (0.3.6) + tzinfo (1.2.5) + thread_safe (~> 0.1) + websocket-driver (0.6.5) + websocket-extensions (>= 0.1.0) + websocket-extensions (0.1.3) + +PLATFORMS + ruby + +DEPENDENCIES + bulkrax! + byebug + factory_bot_rails + oai + rspec-rails + sqlite3 + +BUNDLED WITH + 1.17.2 diff --git a/app/assets/javascripts/bulkrax/application.js b/app/assets/javascripts/bulkrax/application.js index e54c6461..dd80ecef 100644 --- a/app/assets/javascripts/bulkrax/application.js +++ b/app/assets/javascripts/bulkrax/application.js @@ -10,4 +10,5 @@ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details // about supported directives. // + //= require_tree . diff --git a/app/assets/javascripts/bulkrax/importers.js b/app/assets/javascripts/bulkrax/importers.js new file mode 100644 index 00000000..e9838e1b --- /dev/null +++ b/app/assets/javascripts/bulkrax/importers.js @@ -0,0 +1,66 @@ +// Place all the behaviors and hooks related to the matching controller here. +// All this logic will automatically be available in application.js. +$(document).ready(function() { + var refresh_button = $('.refresh-set-source') + var base_url = $('#importer_parser_fields_base_url') + var external_set_select = $("#importer_parser_fields_set") + var initial_base_url = base_url.val() + + // handle refreshing/loading of external setes via button click + $('body').on('click', '.refresh-set-source', function(e) { + e.preventDefault() + + handleSourceLoad(refresh_button, base_url, external_set_select) + }) + + // handle refreshing/loading of external sets via blur event for the base_url field + $('body').on('blur', '#importer_parser_fields_base_url', function(e) { + e.preventDefault() + + // ensure we don't make another query if the value is the same -- this can be forced by clicking the refresh button + if (initial_base_url != base_url.val()) { + handleSourceLoad(refresh_button, base_url, external_set_select) + initial_base_url = base_url.val() + } + }) +}); + +function handleSourceLoad(refresh_button, base_url, external_set_select) { + if (base_url.val() == "") { // ignore empty base_url value + return + } + + var initial_button_text = refresh_button.html() + + refresh_button.html('Refreshing...') + refresh_button.attr('disabled', true) + + $.post('/importers/external_sets', { + base_url: base_url.val(), + }, function(res) { + if (!res.error) { + genExternalSetOptions(external_set_select, res.sets) // sets is [[name, spec]...] + } else { + setError(external_set_select, res.error) + } + + refresh_button.html(initial_button_text) + refresh_button.attr('disabled', false) + }) +} + +function genExternalSetOptions(selector, sets) { + out = '' + + out += sets.map(function(set) { + return '' + }) + + selector.html(out) + selector.attr('disabled', false) +} + +function setError(selector, error) { + selector.html('') + selector.attr('disabled', true) +} diff --git a/app/controllers/bulkrax/importers_controller.rb b/app/controllers/bulkrax/importers_controller.rb new file mode 100644 index 00000000..61702431 --- /dev/null +++ b/app/controllers/bulkrax/importers_controller.rb @@ -0,0 +1,108 @@ +require_dependency "bulkrax/application_controller" +require_dependency "oai" + +module Bulkrax + class ImportersController < ApplicationController + include Hyrax::ThemedLayoutController + + before_action :set_importer, only: [:show, :edit, :update, :destroy] + with_themed_layout 'dashboard' + + # GET /importers + def index + add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path + add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path + add_breadcrumb 'Importers', bulkrax.importers_path + @importers = Importer.all + end + + # GET /importers/1 + def show + end + + # GET /importers/new + def new + add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path + add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path + add_breadcrumb 'Importers', bulkrax.importers_path + @importer = Importer.new + end + + # GET /importers/1/edit + def edit + add_breadcrumb t(:'hyrax.controls.home'), main_app.root_path + add_breadcrumb t(:'hyrax.dashboard.breadcrumbs.admin'), hyrax.dashboard_path + add_breadcrumb 'Importers', bulkrax.importers_path + end + + # POST /importers + def create + @importer = Importer.new(importer_params) + + if @importer.save + redirect_to @importer, notice: 'Importer was successfully created.' + else + render :new + end + end + + # PATCH/PUT /importers/1 + def update + if @importer.update(importer_params) + redirect_to @importer, notice: 'Importer was successfully updated.' + else + render :edit + end + end + + # DELETE /importers/1 + def destroy + @importer.destroy + redirect_to importers_url, notice: 'Importer was successfully destroyed.' + end + + def external_sets + if list_external_sets + render json: { base_url: params[:base_url], sets: @sets } + else + render json: { base_url: params[:base_url], error: "unable to pull data from #{params[:base_url]}" } + end + end + + private + # Use callbacks to share common setup or constraints between actions. + def set_importer + @importer = Importer.find(params[:id]) + end + + # Only allow a trusted parameter "white list" through. + def importer_params + params.require(:importer).permit(:name, :admin_set_id, :user_id, :frequency, :parser_klass, :limit, parser_fields: {}, field_mapping: {}) + end + + def list_external_sets + url = params[:base_url] || (@harvester ? @harvester.base_url : nil) + setup_client(url) if url.present? + + @sets = [['All', 'all']] + + begin + @client.list_sets.each do |s| + @sets << [s.name, s.spec] + end + rescue + return false + end + + @sets + end + + def setup_client(url) + return false if url.nil? + + headers = { from: 'server@atla.com' } + + @client ||= OAI::Client.new(url, headers: headers, parser: 'libxml', metadata_prefix: 'oai_dc') + end + end +end diff --git a/app/helpers/bulkrax/application_helper.rb b/app/helpers/bulkrax/application_helper.rb index 1a53cddf..2a798d27 100644 --- a/app/helpers/bulkrax/application_helper.rb +++ b/app/helpers/bulkrax/application_helper.rb @@ -1,4 +1,6 @@ module Bulkrax module ApplicationHelper + include Hyrax::HyraxHelperBehavior + end end diff --git a/app/helpers/bulkrax/importers_helper.rb b/app/helpers/bulkrax/importers_helper.rb new file mode 100644 index 00000000..521da68a --- /dev/null +++ b/app/helpers/bulkrax/importers_helper.rb @@ -0,0 +1,11 @@ +module Bulkrax + module ImportersHelper + # borrowd from batch-importer https://github.com/samvera-labs/hyrax-batch_ingest/blob/master/app/controllers/hyrax/batch_ingest/batches_controller.rb + def available_admin_sets + # Restrict available_admin_sets to only those current user can desposit to. + @available_admin_sets ||= Hyrax::Collections::PermissionsService.source_ids_for_deposit(ability: current_ability, source_type: 'admin_set').map do |admin_set_id| + [AdminSet.find(admin_set_id).title.first, admin_set_id] + end + end + end +end diff --git a/app/jobs/bulkrax/import_work_job.rb b/app/jobs/bulkrax/import_work_job.rb new file mode 100644 index 00000000..204e1f21 --- /dev/null +++ b/app/jobs/bulkrax/import_work_job.rb @@ -0,0 +1,10 @@ +module Bulkrax + class ImportWorkJob < ApplicationJob + queue_as :import + + def perform(*args) + @importer = Importer.find(args[0]) + @importer.import_work(args[1]) + end + end +end diff --git a/app/jobs/bulkrax/importer_job.rb b/app/jobs/bulkrax/importer_job.rb new file mode 100644 index 00000000..d2695674 --- /dev/null +++ b/app/jobs/bulkrax/importer_job.rb @@ -0,0 +1,17 @@ +module Bulkrax + class ImporterJob < ApplicationJob + queue_as :import + + def perform(importer_id, only_updates_since_last_import=false) + start = Time.current + importer = Importer.find(id) + + importer.import_works + importer.last_imported_at = start + if importer.schedulable? + ImporterJob.set(wait_until: importer.next_import_at).perform_later(importer.id, true) + end + + end + end +end diff --git a/app/models/bulkrax/entries/application_entry.rb b/app/models/bulkrax/entries/application_entry.rb new file mode 100644 index 00000000..4884b696 --- /dev/null +++ b/app/models/bulkrax/entries/application_entry.rb @@ -0,0 +1,12 @@ +module Bulkrax + module Entries + class ApplicationEntry + + def build + # attributes, files_dir = nil, files = [], user = nil + Bulkrax::Factories::ApplicationFactory.for(entry_class).new(all_attrs, nil, [], user).run + end + + end + end +end diff --git a/app/models/bulkrax/entries/oai_entry.rb b/app/models/bulkrax/entries/oai_entry.rb new file mode 100644 index 00000000..9bcd0178 --- /dev/null +++ b/app/models/bulkrax/entries/oai_entry.rb @@ -0,0 +1,46 @@ +module Bulkrax + module Entries + class OaiEntry < ApplicationEntry + attr_accessor :parser, :importer, :raw_record, :parsed_record, :all_attrs, :identifier + + delegate :client, + :mapping_class, + :collection_name, + :user, + to: :parser + + def initialize(parser, identifier) + @parser= parser + @identifier = identifier + end + + def entry_class + 'ETD' + end + + def raw_record + @raw_record ||= client.get_record({identifier: identifier}) + end + + def mapping + @mapping ||= mapping_class.new( + raw_record, + parser.parser_fields['rights_statement'], + parser.parser_fields['institution_name'], + parser.parser_fields['thumbnail_url'], + collection_name == "all" + ) + end + + def all_attrs + return @all_attrs if @all_attrs + @all_attrs ||= mapping.all_attrs + unless collection_name == "all" + @all_attrs['collection'] = {identifier: [collection_name]} + end + return @all_attrs + end + + end + end +end diff --git a/app/models/bulkrax/factories/application_factory.rb b/app/models/bulkrax/factories/application_factory.rb new file mode 100644 index 00000000..e381f8e8 --- /dev/null +++ b/app/models/bulkrax/factories/application_factory.rb @@ -0,0 +1,20 @@ +module Bulkrax + module Factories + class ApplicationFactory + extend ActiveSupport::Autoload + + eager_autoload do + autoload :CollectionFactory + autoload :ETDFactory + autoload :ImageFactory + autoload :ObjectFactory + autoload :WithAssociatedCollection + end + + # @param [#to_s] First (Xxx) portion of an "XxxFactory" constant + def self.for(model_name) + const_get "Bulkrax::Factories::#{model_name}Factory" + end + end + end +end diff --git a/app/models/bulkrax/factories/collection_factory.rb b/app/models/bulkrax/factories/collection_factory.rb new file mode 100644 index 00000000..bc4a23df --- /dev/null +++ b/app/models/bulkrax/factories/collection_factory.rb @@ -0,0 +1,23 @@ +module Bulkrax + module Factories + class CollectionFactory < ObjectFactory + self.klass = Collection + self.system_identifier_field = :identifier + + def find_or_create + collection = find + return collection if collection + run(&:save!) + end + + def update + raise "Collection doesn't exist" unless object + object.attributes = update_attributes + run_callbacks(:save) do + object.save! + end + log_updated(object) + end + end + end +end diff --git a/app/models/bulkrax/factories/etd_factory.rb b/app/models/bulkrax/factories/etd_factory.rb new file mode 100644 index 00000000..79675d86 --- /dev/null +++ b/app/models/bulkrax/factories/etd_factory.rb @@ -0,0 +1,16 @@ +module Bulkrax + module Factories + class ETDFactory < ObjectFactory + include WithAssociatedCollection + + self.klass = Work + # A way to identify objects that are not Hydra minted identifiers + self.system_identifier_field = 'identifier' + + # TODO: add resource type? + # def create_attributes + # #super.merge(resource_type: 'ETD') + # end + end + end +end diff --git a/app/models/bulkrax/factories/image_factory.rb b/app/models/bulkrax/factories/image_factory.rb new file mode 100644 index 00000000..88208e35 --- /dev/null +++ b/app/models/bulkrax/factories/image_factory.rb @@ -0,0 +1,16 @@ +module Bulkrax + module Factories + class ImageFactory < ObjectFactory + include WithAssociatedCollection + + self.klass = Image + # A way to identify objects that are not Hydra minted identifiers + self.system_identifier_field = :identifier + + # TODO: add resource type? + # def create_attributes + # #super.merge(resource_type: 'Image') + # end + end + end +end diff --git a/app/models/bulkrax/factories/object_factory.rb b/app/models/bulkrax/factories/object_factory.rb new file mode 100644 index 00000000..1fd9ff50 --- /dev/null +++ b/app/models/bulkrax/factories/object_factory.rb @@ -0,0 +1,156 @@ +# TODO require 'importer/log_subscriber' +module Bulkrax + module Factories + class ObjectFactory + extend ActiveModel::Callbacks + define_model_callbacks :save, :create + class_attribute :klass, :system_identifier_field + attr_reader :attributes, :files_directory, :object, :files + + def initialize(attributes, files_dir = nil, files = [], user = nil) + @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes) + @files_directory = files_dir + @files = files + @user = user || User.batch_user + end + + def run + arg_hash = { id: attributes[:id], name: 'UPDATE', klass: klass } + @object = find + if @object + ActiveSupport::Notifications.instrument('import.importer', arg_hash) { update } + else + ActiveSupport::Notifications.instrument('import.importer', arg_hash.merge(name: 'CREATE')) { create } + end + yield(object) if block_given? + object + end + + def update + raise "Object doesn't exist" unless object + run_callbacks(:save) do + work_actor.update(environment(update_attributes)) + end + log_updated(object) + end + + def create_attributes + transform_attributes + end + + def update_attributes + transform_attributes.except(:id) + end + + def find + return find_by_id if attributes[:id] + return search_by_identifier if attributes[system_identifier_field].present? + + raise "Missing identifier: Unable to search for existing object without " \ + "either fedora ID or #{system_identifier_field}" + end + + def find_by_id + klass.find(attributes[:id]) if klass.exists?(attributes[:id]) + end + + def search_by_identifier + query = { system_identifier_field => + attributes[system_identifier_field] } + klass.where(query).first + end + + # An ActiveFedora bug when there are many habtm <-> has_many associations means they won't all get saved. + # https://github.com/projecthydra/active_fedora/issues/874 + # 2+ years later, still open! + def create + attrs = create_attributes + @object = klass.new + run_callbacks :save do + run_callbacks :create do + Rails.logger.debug("============= 6") + klass == Collection ? create_collection(attrs) : work_actor.create(environment(attrs)) + Rails.logger.debug("============= 7") + + end + end + log_created(object) + end + + def log_created(obj) + msg = "Created #{klass.model_name.human} #{obj.id}" + Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})") + end + + def log_updated(obj) + msg = "Updated #{klass.model_name.human} #{obj.id}" + Rails.logger.info("#{msg} (#{Array(attributes[system_identifier_field]).first})") + end + + private + + # @param [Hash] attrs the attributes to put in the environment + # @return [Hyrax::Actors::Environment] + def environment(attrs) + Hyrax::Actors::Environment.new(@object, Ability.new(@user), attrs) + end + + def work_actor + Hyrax::CurationConcern.actor + end + + def create_collection(attrs) + Rails.logger.debug("============= 8") + + @object.attributes = attrs + @object.apply_depositor_metadata(@user) + Rails.logger.debug("============= 9") + + @object.save! + end + + # Override if we need to map the attributes from the parser in + # a way that is compatible with how the factory needs them. + def transform_attributes + attributes.slice(*permitted_attributes) + .merge(file_attributes) + end + + # Find existing file or upload new file. This assumes a Work will have unique file titles; + # could filter by URIs instead (slower). + # When an uploaded_file already exists we do not want to pass its id in `file_attributes` + # otherwise it gets reuploaded by `work_actor`. + def upload_ids + work_files_titles = object.file_sets.map(&:title) if object.present? && object.file_sets.present? + work_files_titles && work_files_titles.include?(attributes[:file]) ? [] : [import_file(file_paths.first)] + end + + def file_attributes + hash = {} + hash[:uploaded_files] = upload_ids if files_directory.present? && attributes[:file].present? + hash[:remote_files] = attributes[:remote_files] if attributes[:remote_files].present? + hash + end + + def file_paths + attributes[:file].map { |file_name| File.join(files_directory, file_name) } if attributes[:file] + end + + def import_file(path) + u = Hyrax::UploadedFile.new + u.user_id = @user.id + u.file = CarrierWave::SanitizedFile.new(path) + u.save + u.id + end + + ## TO DO: handle invalid file in CSV + ## currently the importer stops if no file corresponding to a given file_name is found + + # Regardless of what the MODS Parser gives us, these are the properties we are prepared to accept. + def permitted_attributes + klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility] + end + end + end +end diff --git a/app/models/bulkrax/factories/with_associated_collection.rb b/app/models/bulkrax/factories/with_associated_collection.rb new file mode 100644 index 00000000..49ea8c24 --- /dev/null +++ b/app/models/bulkrax/factories/with_associated_collection.rb @@ -0,0 +1,29 @@ +module Bulkrax + module Factories + module WithAssociatedCollection + extend ActiveSupport::Concern + + # Strip out the :collection key, and add the member_of_collection_ids, + # which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor + def create_attributes + return super if attributes[:collection].nil? + super.except(:collection).merge(member_of_collection_ids: [collection.id]) + end + + # Strip out the :collection key, and add the member_of_collection_ids, + # which is used by Hyrax::Actors::AddAsMemberOfCollectionsActor + def update_attributes + return super if attributes[:collection].nil? + super.except(:collection).merge(member_of_collection_ids: [collection.id]) + end + + private + + def collection + @collection ||= CollectionFactory.new(attributes.fetch(:collection)).find_or_create + @collection.reindex_extent = Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX + return @collection + end + end + end +end diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb new file mode 100644 index 00000000..cad8d8a6 --- /dev/null +++ b/app/models/bulkrax/importer.rb @@ -0,0 +1,115 @@ +require 'iso8601' + +module Bulkrax + class Importer < ApplicationRecord + serialize :parser_fields, JSON + serialize :field_mapping, JSON + + belongs_to :user + has_many :importer_runs, dependent: :destroy, foreign_key: 'bulkrax_importer_id' + + validates :name, presence: true + validates :admin_set_id, presence: true + # TODO validates :metadata_prefix, presence: true + # TODO validates :base_url, presence: true + + def parser + # create an parser based on importer + @parser ||= self.parser_klass.constantize.new( + self + # self.file_url, + # # self.right_statement, + # # self.institution_name, + # self.user, + # self.admin_set_id, + # self.parser_fields, + # self.field_mapping + # # self.external_set_id, + ) + end + + def frequency_enums + # these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations) + # TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration. + # i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds + [['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']] + end + + def frequency=(frequency) + write_attribute(:frequency, ISO8601::Duration.new(frequency).to_s) + end + + def frequency + ISO8601::Duration.new read_attribute(:frequency) if read_attribute(:frequency) + end + + def schedulable? + frequency.to_seconds != 0 + end + + def next_import_at + (last_imported_at || Time.current) + frequency.to_seconds if schedulable? and last_imported_at.present? + end + + def current_importer_run + @current_importer_run ||= self.importer_runs.create!(total_records: self.limit) + end + + def seen + @seen ||= {} + end + + def import_works + parser.create_collections + parser.records(quick: true).each_with_index do |record, index| + if !limit.nil? && index >= limit + break + elsif record.deleted? # TODO record.status == "deleted" + self.current_importer_run.deleted_records += 1 + else + seen[record.identifier] = true + ImportWorkJob.perform_later(self.id, record.identifier) + self.increment_counters(index) + end + current_importer_run.save + end + + remove_unseen + end + + def remove_unseen + # TODO + # if primary_collection + # primary_collection.member_ids.each do |id| + # w = Work.find id + # unless seen[w.source[0]] + # if w.in_collections.size > 1 + # primary_collection.members.delete w # only removes from primary collection - wants the record, not the id + # primary_collection.save + # else + # w.delete # removes from all collections + # end + # end + # end + # end + end + + def import_work(identifier) + entry = parser.entry(identifier) + entry.build + end + + def increment_counters(index) + if limit.to_i > 0 + current_importer_run.total_records = limit + elsif parser.total > 0 + current_importer_run.total_records = parser.total + else + current_importer_run.total_records = index + 1 + end + current_importer_run.enqueued_records = index + 1 + current_importer_run.save! + end + + end +end diff --git a/app/models/bulkrax/importer_run.rb b/app/models/bulkrax/importer_run.rb new file mode 100644 index 00000000..bbd3626f --- /dev/null +++ b/app/models/bulkrax/importer_run.rb @@ -0,0 +1,5 @@ +module Bulkrax + class ImporterRun < ApplicationRecord + belongs_to :importer, foreign_key: 'bulkrax_importer_id' + end +end diff --git a/app/models/bulkrax/mappings/application_mapping.rb b/app/models/bulkrax/mappings/application_mapping.rb new file mode 100644 index 00000000..8cea050d --- /dev/null +++ b/app/models/bulkrax/mappings/application_mapping.rb @@ -0,0 +1,110 @@ +require 'language_list' +require 'erb' +require 'ostruct' + +module Bulkrax + module Mappings + class ApplicationMapping + attr_accessor :record, :rights_statement, :contributing_institution, :thumbnail_url, :all + class_attribute :matchers + + def initialize(record, rights_statement, contributing_institution, thumbnail_url, all = false) + @record = record.record + @rights_statement = rights_statement + @contributing_institution = contributing_institution + @thumbnail_url = thumbnail_url + @all = all + end + + def self.matcher(name, args={}) + self.matchers ||= {} + from = args[:from] || [name] + + matcher = matcher_class.new( + to: name, + from: from, + parsed: args[:parsed], + split: args[:split], + if: args[:if] + ) + + from.each do |lookup| + self.matchers[lookup] = matcher + end + end + + def metadata + return @metadata if @metadata + + @metadata = {} + record.metadata.children.each do |child| + child.children.each do |node| + add_metadata(node.name, node.content) + end + end + +# TODO go through all parer_fields and add them? + add_metadata('thumbnail_url', thumbnail_url) + + @metadata['contributing_institution'] = [contributing_institution] + @metadata['rights_statement'] = [rights_statement] + @metadata['visibility'] = 'open' + + @metadata + end + + def add_metadata(node_name, node_content) + matcher = self.class.matchers[node_name] + + if matcher + result = matcher.result(self, node_content) + if result + key = matcher.to + @metadata[key] ||= [] + + if result.is_a?(Array) + @metadata[key] += result + else + @metadata[key] << result + end + end + end + end + + def all_attrs + merge_attrs(header, metadata) + end + + def context + @context ||= OpenStruct.new(record: record, identifier: record.header.identifier) + end + + def thumbnail_url + ERB.new(@thumbnail_url).result(context.instance_eval { binding }) + end + + def header + { + 'source' => [record.header.identifier] + } + end + + def merge_attrs(first, second) + return first if second.blank? + + first = {} if first.blank? + + first.merge(second) do |key, old, new| + if key =~ /identifier/ + merged_value = old if old.first =~ /^http/ + merged_value = new if new.first =~ /^http/ + else + merged_value = old + new + end + merged_value + end + end + + end + end +end diff --git a/app/models/bulkrax/mappings/oai_mapping.rb b/app/models/bulkrax/mappings/oai_mapping.rb new file mode 100644 index 00000000..19638011 --- /dev/null +++ b/app/models/bulkrax/mappings/oai_mapping.rb @@ -0,0 +1,24 @@ +module Bulkrax + module Mappings + class OaiMapping < ApplicationMapping + def self.matcher_class + Matchers::OaiMatcher + end + + matcher 'contributor', split: true + matcher 'creator', split: true + matcher 'date', from: ['date'], split: true + matcher 'description' + matcher 'format_digital', from: ['format_digital', 'format'], parsed: true + matcher 'identifier', from: ['identifier'], if: ->(parser, content) { content.match(/http(s{0,1}):\/\//) } + matcher 'language', parsed: true, split: true + matcher 'place', from: ['coverage'] + matcher 'publisher', split: /\s*[;]\s*/ + matcher 'relation', split: true + matcher 'subject', split: true + matcher 'title' + matcher 'types', from: ['types', 'type'], split: true, parsed: true + matcher 'remote_files', from: ['thumbnail_url'], parsed: true + end + end +end diff --git a/app/models/bulkrax/matchers/application_matcher.rb b/app/models/bulkrax/matchers/application_matcher.rb new file mode 100644 index 00000000..39400fd2 --- /dev/null +++ b/app/models/bulkrax/matchers/application_matcher.rb @@ -0,0 +1,39 @@ +module Bulkrax + module Matchers + class ApplicationMatcher + attr_accessor :to, :from, :parsed, :if, :split + + def initialize(args) + args.each do |k, v| + send("#{k}=", v) + end + end + + def result(parser, content) + return nil if self.if && !self.if.call(parser, content) + + @result = content.gsub(/\s/, ' ') # remove any line feeds and tabs + + if self.split.is_a?(Regexp) + @result = @result.split(self.split) + elsif self.split + @result = @result.split(/\s*[:;|]\s*/) # default split by : ; | + end + + if @result.is_a?(Array) && @result.size == 1 + @result = @result[0] + end + + if @result.is_a?(Array) && self.parsed + @result.each_with_index do |res, index| + @result[index] = send("parse_#{to}", res) + end + elsif self.parsed + @result = send("parse_#{to}", @result) + end + + return @result + end + end + end +end diff --git a/app/models/bulkrax/matchers/oai_matcher.rb b/app/models/bulkrax/matchers/oai_matcher.rb new file mode 100644 index 00000000..71dea4a4 --- /dev/null +++ b/app/models/bulkrax/matchers/oai_matcher.rb @@ -0,0 +1,67 @@ +module Bulkrax + module Matchers + class OaiMatcher < ApplicationMatcher + def parse_remote_files(src) + {url: src} + end + + def parse_language(src) + l = LanguageList::LanguageInfo.find(src) + return l ? l.name : src + end + + def parse_types(src) + src.to_s.titleize + end + + def parse_format_original(src) + src.to_s.titleize + end + + def parse_format_digital(src) + case src + when 'application/pdf','pdf', 'PDF' + 'PDF' + when 'image/jpeg', 'image/jpg', 'jpeg', 'jpg', 'JPEG', 'JPG' + 'JPEG' + when 'image/tiff', 'image/tif', 'tiff', 'tif', 'TIFF', 'TIF' + 'TIFF' + when 'image/jp2', 'jp2', 'JP2' + 'JP2' + when 'image/png', 'png', 'PNG' + 'PNG' + when 'image/gif', 'gif', 'GIF' + 'GIF' + when 'video/mp4', 'mp4', 'MP4' + 'MP4' + when 'video/ogg', 'ogg', 'OGG' + 'OGG' + when 'video/vnd.avi', 'video/avi', 'avi', 'AVI' + 'AVI' + when 'audio/aac', 'aac', 'AAC' + 'AAC' + when 'audio/mp4', 'mp4', 'MP4' + 'MP4' + when 'audio/mpeg', 'audio/mp3', 'audio/mpeg3', 'mpeg', 'MPEG', 'mp3', 'MP3', 'mpeg3', 'MPEG3' + 'MPEG' + when 'audio/ogg', 'ogg', 'OGG' + 'OGG' + when 'audio/aiff', 'aiff', 'AIFF' + 'AIFF' + when 'audio/webm', 'webm', 'WEBM' + 'WEBM' + when 'audio/wav', 'wav', 'WAV' + 'WAV' + when 'text/csv', 'csv', 'CSV' + 'CSV' + when 'text/html', 'html', 'HTML' + 'HTML' + when 'text/rtf', 'rtf', 'RTF' + 'RTF' + else + src.to_s.titleize + end + end + end + end +end diff --git a/app/models/bulkrax/parsers/application_parser.rb b/app/models/bulkrax/parsers/application_parser.rb new file mode 100644 index 00000000..65df0ccd --- /dev/null +++ b/app/models/bulkrax/parsers/application_parser.rb @@ -0,0 +1,45 @@ +module Bulkrax + module Parsers + class ApplicationParser + + #attr_accessor :url, :headers, :file_url, :user, :admin_set_id, :rights, :institution, :total, :client, :collection_name, :metadata_prefix + attr_accessor :importer, :total + + def self.parser_fields + {} + end + + def initialize(importer) + @importer = importer + end + + # @api + def entry_class + raise 'must be defined' + end + + # @api + def mapping_class + raise 'must be defined' + end + + # @api + def records(opts = {}) + raise 'must be defined' + end + + def record(identifier, opts = {}) + return @record if @record + + @record = entry_class.new(self, identifier) + @record.build + return @record + end + + def total + 0 + end + + end + end +end diff --git a/app/models/bulkrax/parsers/oai_parser.rb b/app/models/bulkrax/parsers/oai_parser.rb new file mode 100644 index 00000000..21536af7 --- /dev/null +++ b/app/models/bulkrax/parsers/oai_parser.rb @@ -0,0 +1,84 @@ +module Bulkrax + module Parsers + class OaiParser < ApplicationParser + attr_accessor :client, :headers + delegate :list_sets, to: :client + delegate :parser_fields, :user, to: :importer + + def self.parser_fields + { + base_url: :string, + metadata_prefix: :string, + set: :string, + institution_name: :string, + rights_statements: :string, + thumbnail_url: :string + } + end + + def initialize(importer) + super + @headers = { from: importer.user.email } + end + + def client + @client ||= OAI::Client.new(parser_fields['base_url'], + headers: headers, + parser: 'libxml', + metadata_prefix: importer.parser_fields['metadata_prefix']) + end + + def collection_name + @collection_name ||= parser_fields['set'] || 'all' + end + + def entry_class + Entries::OaiEntry + end + + def mapping_class + Mappings::OaiMapping + end + + def entry(identifier) + entry_class.new(self, identifier) + end + + def records(opts = {}) + if opts[:quick] + opts.delete(:quick) + @short_records = client.list_identifiers(opts) + else + @records ||= client.list_records(opts) + end + end + + def list_sets + client.list_sets + end + + def create_collections + list_sets.each do |set| + if collection_name == 'all' || collection_name == set.spec + attrs = { + title: [set.name], + identifier: [set.spec], + institution: [parser_fields['institution_name']], + collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid + } + #Bulkrax::Factories::CollectionFactory.new(attrs).find_or_create + collection = Collection.where(identifier: [set.spec]).first + collection ||= Collection.create!(attrs) + end + end + end + + def total + @total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i + rescue + @total = 0 + end + + end + end +end diff --git a/app/views/bulkrax/importers/_form.html.erb b/app/views/bulkrax/importers/_form.html.erb new file mode 100644 index 00000000..dba30658 --- /dev/null +++ b/app/views/bulkrax/importers/_form.html.erb @@ -0,0 +1,55 @@ +
+ <% if importer.errors.any? %> +
+

<%= pluralize(importer.errors.count, "error") %> prohibited this importer from being saved:

+ + +
+ <% end %> + + <%= form.input :name %> + + <%= form.input :admin_set_id, collection: available_admin_sets %> + <%= form.hidden_field :user_id, value: current_user.id %> + + <%= form.input :frequency, collection: form.object.frequency_enums %> + + <%= form.input :limit, as: :integer, hint: 'leave blank or 0 for all records' %> + + <%= form.input :parser_klass, collection: [["OAI", "Bulkrax::Parsers::OaiParser"]], label: "Parser" %> + + <%= form.fields_for :parser_fields do |fi| %> + <%= fi.input :base_url, as: :string %> + <%= fi.input :metadata_prefix, as: :string, hint: 'Such as oai_dc, dcterms or oai_qdc' %> + <%= fi.input :set, collection: [], label: 'Set (source)' %> + + + <%= fi.input :institution_name, as: :string %> + <% rights_statements = Hyrax.config.rights_statement_service_class.new %> + <%= fi.input :rights_statement, + collection: rights_statements.select_active_options, + include_blank: true, + item_helper: rights_statements.method(:include_current_value), + input_html: { class: 'form-control' } %> + <%= fi.input :thumbnail_url, as: :string %> +
+

+ The Thumbnail URL allows for basic templating and substitution on any identified information in to the url. For example: +

+ +

+ http://commons.ptsem.edu/?cover=<%= identifier.split(':').last %>&size=L +

+ +

+ http://commons.ptsem.edu/?cover=<%= record.header.identifier.split(':').last %>&size=L +

+
+ + <% end %> + +
diff --git a/app/views/bulkrax/importers/edit.html.erb b/app/views/bulkrax/importers/edit.html.erb new file mode 100644 index 00000000..7eeb0174 --- /dev/null +++ b/app/views/bulkrax/importers/edit.html.erb @@ -0,0 +1,6 @@ +

Editing Importer

+ +<%= render 'form', importer: @importer %> + +<%= link_to 'Show', @importer %> | +<%= link_to 'Back', importers_path %> diff --git a/app/views/bulkrax/importers/index.html.erb b/app/views/bulkrax/importers/index.html.erb new file mode 100644 index 00000000..926c288d --- /dev/null +++ b/app/views/bulkrax/importers/index.html.erb @@ -0,0 +1,50 @@ +<% provide :page_header do %> +

Importers

+
+ <%= link_to new_importer_path, class: 'btn btn-primary' do %> + <%= t(:'helpers.action.importer.new') %> + <% end %> +
+<% end %> + +
+
+ <% if @importers.present? %> +
+ + + + + + + + + + + + + + + <% @importers.each do |importer| %> + + + + + + + + + + + + + <% end %> + +
NameLast RunNext RunRecords EnqueuedRecords ProcessedRecords FailedRecords Deleted UpstreamTotal Records
<%= importer.name %><%= importer.last_imported_at.strftime("%b %d, %Y") if importer.last_imported_at %><%= importer.next_import_at.strftime("%b %d, %Y") if importer.next_import_at %><%= importer.import_runs.last&.enqueued %><%= importer.import_runs.last&.processed %><%= importer.import_runs.last&.failures %><%= importer.import_runs.last&.deleted %><%= importer.import_runs.last&.total %><%= link_to raw(''), edit_importer_path(importer) %><%= link_to raw(''), importer, method: :delete, data: { confirm: 'Are you sure?' } %>
+
+ <% else %> +

No importers have been created.

+ <% end %> +
+
+ diff --git a/app/views/bulkrax/importers/new.html.erb b/app/views/bulkrax/importers/new.html.erb new file mode 100644 index 00000000..0769eb9e --- /dev/null +++ b/app/views/bulkrax/importers/new.html.erb @@ -0,0 +1,21 @@ +<% provide :page_header do %> +

New Importer

+<% end %> + +
+
+
+ <%= simple_form_for @importer do |form| %> + <%= render 'form', importer: @importer, form: form %> + + <% end %> +
+
+
diff --git a/app/views/bulkrax/importers/show.html.erb b/app/views/bulkrax/importers/show.html.erb new file mode 100644 index 00000000..e295df2f --- /dev/null +++ b/app/views/bulkrax/importers/show.html.erb @@ -0,0 +1,44 @@ +

<%= notice %>

+ +

+ Name: + <%= @importer.name %> +

+ +

+ Admin set: + <%= @importer.admin_set_id %> +

+ +

+ User: + <%= @importer.user %> +

+ +

+ Frequency: + <%= @importer.frequency %> +

+ +

+ Parser klass: + <%= @importer.parser_klass %> +

+ +

+ Limit: + <%= @importer.limit %> +

+ +

+ Parser fields: + <%= @importer.parser_fields %> +

+ +

+ Field mapping: + <%= @importer.field_mapping %> +

+ +<%= link_to 'Edit', edit_importer_path(@importer) %> | +<%= link_to 'Back', importers_path %> diff --git a/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb b/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb new file mode 100644 index 00000000..9641191f --- /dev/null +++ b/app/views/hyrax/dashboard/sidebar/_repository_content.html.erb @@ -0,0 +1,15 @@ +
  • <%= t('hyrax.admin.sidebar.repository_objects') %>
  • + +<%= menu.nav_link(hyrax.my_collections_path, + also_active_for: hyrax.dashboard_collections_path) do %> + <%= t('hyrax.admin.sidebar.collections') %> +<% end %> + +<%= menu.nav_link(hyrax.my_works_path, + also_active_for: hyrax.dashboard_works_path) do %> + <%= t('hyrax.admin.sidebar.works') %> +<% end %> + +<%= menu.nav_link(bulkrax.importers_path) do %> + <%= t('bulkrax.admin.sidebar.importers') %> +<% end %> diff --git a/bulkrax.gemspec b/bulkrax.gemspec index ba6f55ef..b1d70849 100644 --- a/bulkrax.gemspec +++ b/bulkrax.gemspec @@ -9,14 +9,19 @@ Gem::Specification.new do |s| s.version = Bulkrax::VERSION s.authors = ["Rob Kaufman"] s.email = ["rob@notch8.com"] - s.homepage = "TODO" - s.summary = "TODO: Summary of Bulkrax." - s.description = "TODO: Description of Bulkrax." + s.homepage = "https://github.com/samvera-labs/bulkrax" + s.summary = "Summary of Bulkrax." + s.description = "Description of Bulkrax." s.license = "MIT" s.files = Dir["{app,config,db,lib}/**/*", "MIT-LICENSE", "Rakefile", "README.md"] s.add_dependency "rails", "~> 5.1.6" + s.add_dependency 'iso8601', '~> 0.9.0' + s.add_dependency 'oai', '~> 0.4' + s.add_dependency 'libxml-ruby', '~> 3.1.0' + s.add_dependency 'simple_form', '~> 3.2', '<= 3.5.0' + s.add_dependency 'language_list', '~> 1.2', '>= 1.2.1' s.add_development_dependency "sqlite3" end diff --git a/config/routes.rb b/config/routes.rb index 6a802500..647fba88 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,2 +1,7 @@ Bulkrax::Engine.routes.draw do + resources :importers do + collection do + post :external_sets + end + end end diff --git a/db/migrate/20181011230201_create_bulkrax_importers.rb b/db/migrate/20181011230201_create_bulkrax_importers.rb new file mode 100644 index 00000000..d68060b5 --- /dev/null +++ b/db/migrate/20181011230201_create_bulkrax_importers.rb @@ -0,0 +1,16 @@ +class CreateBulkraxImporters < ActiveRecord::Migration[5.1] + def change + create_table :bulkrax_importers do |t| + t.string :name + t.string :admin_set_id + t.references :user, foreign_key: true + t.string :frequency + t.string :parser_klass + t.integer :limit + t.text :parser_fields + t.text :field_mapping + + t.timestamps + end + end +end diff --git a/db/migrate/20181011230228_create_bulkrax_importer_runs.rb b/db/migrate/20181011230228_create_bulkrax_importer_runs.rb new file mode 100644 index 00000000..6c27f6bf --- /dev/null +++ b/db/migrate/20181011230228_create_bulkrax_importer_runs.rb @@ -0,0 +1,14 @@ +class CreateBulkraxImporterRuns < ActiveRecord::Migration[5.1] + def change + create_table :bulkrax_importer_runs do |t| + t.references :importer, foreign_key: true + t.integer :total_records, default: 0 + t.integer :enqueued_records, default: 0 + t.integer :processed_records, default: 0 + t.integer :deleted_records, default: 0 + t.integer :failed_records, default: 0 + + t.timestamps + end + end +end diff --git a/lib/bulkrax/engine.rb b/lib/bulkrax/engine.rb index 4c5596f4..df57b8e4 100644 --- a/lib/bulkrax/engine.rb +++ b/lib/bulkrax/engine.rb @@ -1,5 +1,21 @@ +require 'oai' + module Bulkrax class Engine < ::Rails::Engine isolate_namespace Bulkrax + + config.generators do |g| + g.test_framework :rspec + g.fixture_replacement :factory_bot, :dir => 'spec/factories' + end + + config.after_initialize do + my_engine_root = Bulkrax::Engine.root.to_s + paths = ActionController::Base.view_paths.collect{|p| p.to_s} + hyrax_path = paths.detect { |path| path.match('/hyrax-') } + paths = paths.insert(paths.index(hyrax_path), my_engine_root + '/app/views') + ActionController::Base.view_paths = paths + end + end end diff --git a/spec/controllers/bulkrax/importers_controller_spec.rb b/spec/controllers/bulkrax/importers_controller_spec.rb new file mode 100644 index 00000000..2bde6349 --- /dev/null +++ b/spec/controllers/bulkrax/importers_controller_spec.rb @@ -0,0 +1,143 @@ +require 'rails_helper' + +# This spec was generated by rspec-rails when you ran the scaffold generator. +# It demonstrates how one might use RSpec to specify the controller code that +# was generated by Rails when you ran the scaffold generator. +# +# It assumes that the implementation code is generated by the rails scaffold +# generator. If you are using any extension libraries to generate different +# controller code, this generated spec may or may not pass. +# +# It only uses APIs available in rails and/or rspec-rails. There are a number +# of tools you can use to make these specs even more expressive, but we're +# sticking to rails and rspec-rails APIs to keep things simple and stable. +# +# Compared to earlier versions of this generator, there is very limited use of +# stubs and message expectations in this spec. Stubs are only used when there +# is no simpler way to get a handle on the object needed for the example. +# Message expectations are only used when there is no simpler way to specify +# that an instance is receiving a specific message. +# +# Also compared to earlier versions of this generator, there are no longer any +# expectations of assigns and templates rendered. These features have been +# removed from Rails core in Rails 5, but can be added back in via the +# `rails-controller-testing` gem. + +module Bulkrax + RSpec.describe ImportersController, type: :controller do + + # This should return the minimal set of attributes required to create a valid + # Importer. As you add validations to Importer, be sure to + # adjust the attributes here as well. + let(:valid_attributes) { + skip("Add a hash of attributes valid for your model") + } + + let(:invalid_attributes) { + skip("Add a hash of attributes invalid for your model") + } + + # This should return the minimal set of values that should be in the session + # in order to pass any filters (e.g. authentication) defined in + # ImportersController. Be sure to keep this updated too. + let(:valid_session) { {} } + + describe "GET #index" do + it "returns a success response" do + Importer.create! valid_attributes + get :index, params: {}, session: valid_session + expect(response).to be_successful + end + end + + describe "GET #show" do + it "returns a success response" do + importer = Importer.create! valid_attributes + get :show, params: {id: importer.to_param}, session: valid_session + expect(response).to be_successful + end + end + + describe "GET #new" do + it "returns a success response" do + get :new, params: {}, session: valid_session + expect(response).to be_successful + end + end + + describe "GET #edit" do + it "returns a success response" do + importer = Importer.create! valid_attributes + get :edit, params: {id: importer.to_param}, session: valid_session + expect(response).to be_successful + end + end + + describe "POST #create" do + context "with valid params" do + it "creates a new Importer" do + expect { + post :create, params: {importer: valid_attributes}, session: valid_session + }.to change(Importer, :count).by(1) + end + + it "redirects to the created importer" do + post :create, params: {importer: valid_attributes}, session: valid_session + expect(response).to redirect_to(Importer.last) + end + end + + context "with invalid params" do + it "returns a success response (i.e. to display the 'new' template)" do + post :create, params: {importer: invalid_attributes}, session: valid_session + expect(response).to be_successful + end + end + end + + describe "PUT #update" do + context "with valid params" do + let(:new_attributes) { + skip("Add a hash of attributes valid for your model") + } + + it "updates the requested importer" do + importer = Importer.create! valid_attributes + put :update, params: {id: importer.to_param, importer: new_attributes}, session: valid_session + importer.reload + skip("Add assertions for updated state") + end + + it "redirects to the importer" do + importer = Importer.create! valid_attributes + put :update, params: {id: importer.to_param, importer: valid_attributes}, session: valid_session + expect(response).to redirect_to(importer) + end + end + + context "with invalid params" do + it "returns a success response (i.e. to display the 'edit' template)" do + importer = Importer.create! valid_attributes + put :update, params: {id: importer.to_param, importer: invalid_attributes}, session: valid_session + expect(response).to be_successful + end + end + end + + describe "DELETE #destroy" do + it "destroys the requested importer" do + importer = Importer.create! valid_attributes + expect { + delete :destroy, params: {id: importer.to_param}, session: valid_session + }.to change(Importer, :count).by(-1) + end + + it "redirects to the importers list" do + importer = Importer.create! valid_attributes + delete :destroy, params: {id: importer.to_param}, session: valid_session + expect(response).to redirect_to(importers_url) + end + end + + end +end diff --git a/spec/factories/bulkrax_importer_runs.rb b/spec/factories/bulkrax_importer_runs.rb new file mode 100644 index 00000000..5e98712e --- /dev/null +++ b/spec/factories/bulkrax_importer_runs.rb @@ -0,0 +1,10 @@ +FactoryBot.define do + factory :bulkrax_importer_run, class: 'Bulkrax::ImporterRun' do + importer { nil } + total_records { 1 } + enqueued_records { 1 } + processed_records { 1 } + deleted_records { 1 } + failed_records { 1 } + end +end diff --git a/spec/factories/bulkrax_importers.rb b/spec/factories/bulkrax_importers.rb new file mode 100644 index 00000000..4944426c --- /dev/null +++ b/spec/factories/bulkrax_importers.rb @@ -0,0 +1,28 @@ +FactoryBot.define do + factory :bulkrax_importer, class: 'Bulkrax::Importer' do + name { "MyString" } + admin_set_id { "MyString" } + user { nil } + frequency { "MyString" } + parser_klass { "MyString" } + limit { 1 } + parser_fields { "" } + field_mapping { "" } + end + + factory :bulkrax_importer_oai, class: 'Bulkrax::Importer' do + name { "Oai Collection" } + admin_set_id { "MyString" } + user { nil } + frequency { "PT0S" } + parser_klass { "Bulkrax::Parsers::OaiParser" } + limit { 10 } + parser_fields { { + 'base_url' => "http://commons.ptsem.edu/api/oai-pmh", + 'metadata_prefix' => 'oai_dc' + } + } + field_mapping { "" } + end + +end diff --git a/spec/features/oai_dc_importer_spec.rb b/spec/features/oai_dc_importer_spec.rb new file mode 100644 index 00000000..e6c9f242 --- /dev/null +++ b/spec/features/oai_dc_importer_spec.rb @@ -0,0 +1,17 @@ +require 'rails_helper' + +module Bulkrax + RSpec.describe 'Importing an oai feed' do + let(:importer) { + f = FactoryBot.build(:bulkrax_importer_oai) + f.user = User.new(email: 'test@example.com') + f.save + f + } + + it 'should create a work' do + importer.import_works + end + end +end + diff --git a/spec/helpers/bulkrax/importers_helper_spec.rb b/spec/helpers/bulkrax/importers_helper_spec.rb new file mode 100644 index 00000000..30692570 --- /dev/null +++ b/spec/helpers/bulkrax/importers_helper_spec.rb @@ -0,0 +1,17 @@ +require 'rails_helper' + +# Specs in this file have access to a helper object that includes +# the ImportersHelper. For example: +# +# describe ImportersHelper do +# describe "string concat" do +# it "concats two strings with spaces" do +# expect(helper.concat_strings("this","that")).to eq("this that") +# end +# end +# end +module Bulkrax + RSpec.describe ImportersHelper, type: :helper do + pending "add some examples to (or delete) #{__FILE__}" + end +end diff --git a/spec/jobs/bulkrax/import_work_job_spec.rb b/spec/jobs/bulkrax/import_work_job_spec.rb new file mode 100644 index 00000000..29379311 --- /dev/null +++ b/spec/jobs/bulkrax/import_work_job_spec.rb @@ -0,0 +1,7 @@ +require 'rails_helper' + +module Bulkrax + RSpec.describe ImportWorkJob, type: :job do + pending "add some examples to (or delete) #{__FILE__}" + end +end diff --git a/spec/jobs/bulkrax/importer_job_spec.rb b/spec/jobs/bulkrax/importer_job_spec.rb new file mode 100644 index 00000000..37ed2fb3 --- /dev/null +++ b/spec/jobs/bulkrax/importer_job_spec.rb @@ -0,0 +1,7 @@ +require 'rails_helper' + +module Bulkrax + RSpec.describe ImporterJob, type: :job do + pending "add some examples to (or delete) #{__FILE__}" + end +end diff --git a/spec/models/bulkrax/importer_run_spec.rb b/spec/models/bulkrax/importer_run_spec.rb new file mode 100644 index 00000000..68a98611 --- /dev/null +++ b/spec/models/bulkrax/importer_run_spec.rb @@ -0,0 +1,7 @@ +require 'rails_helper' + +module Bulkrax + RSpec.describe ImporterRun, type: :model do + pending "add some examples to (or delete) #{__FILE__}" + end +end diff --git a/spec/models/bulkrax/importer_spec.rb b/spec/models/bulkrax/importer_spec.rb new file mode 100644 index 00000000..35722d77 --- /dev/null +++ b/spec/models/bulkrax/importer_spec.rb @@ -0,0 +1,7 @@ +require 'rails_helper' + +module Bulkrax + RSpec.describe Importer, type: :model do + pending "add some examples to (or delete) #{__FILE__}" + end +end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb new file mode 100644 index 00000000..102299f9 --- /dev/null +++ b/spec/rails_helper.rb @@ -0,0 +1,68 @@ +# This file is copied to spec/ when you run 'rails generate rspec:install' +require 'spec_helper' +ENV['RAILS_ENV'] ||= 'test' + +require File.expand_path("../../spec/test_app/config/environment", __FILE__) +ENGINE_RAILS_ROOT = File.join(File.dirname(__FILE__), '../') +# Prevent database truncation if the environment is production +abort("The Rails environment is running in production mode!") if Rails.env.production? +require 'rspec/rails' +# Add additional requires below this line. Rails is not loaded until this point! + +FactoryBot.definition_file_paths << File.join(File.dirname(__FILE__), 'factories') +FactoryBot.find_definitions + +# Requires supporting ruby files with custom matchers and macros, etc, in +# spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are +# run as spec files by default. This means that files in spec/support that end +# in _spec.rb will both be required and run as specs, causing the specs to be +# run twice. It is recommended that you do not name files matching this glob to +# end with _spec.rb. You can configure this pattern with the --pattern +# option on the command line or in ~/.rspec, .rspec or `.rspec-local`. +# +# The following line is provided for convenience purposes. It has the downside +# of increasing the boot-up time by auto-requiring all files in the support +# directory. Alternatively, in the individual `*_spec.rb` files, manually +# require only the support files necessary. +# +# Dir[Rails.root.join('spec', 'support', '**', '*.rb')].each { |f| require f } + +# Checks for pending migrations and applies them before tests are run. +# If you are not using ActiveRecord, you can remove these lines. +begin + ActiveRecord::Migration.maintain_test_schema! +rescue ActiveRecord::PendingMigrationError => e + puts e.to_s.strip + exit 1 +end +RSpec.configure do |config| + config.include FactoryBot::Syntax::Methods + + # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures + config.fixture_path = "#{::Rails.root}/spec/fixtures" + + # If you're not using ActiveRecord, or you'd prefer not to run each of your + # examples within a transaction, remove the following line or assign false + # instead of true. + config.use_transactional_fixtures = true + + # RSpec Rails can automatically mix in different behaviours to your tests + # based on their file location, for example enabling you to call `get` and + # `post` in specs under `spec/controllers`. + # + # You can disable this behaviour by removing the line below, and instead + # explicitly tag your specs with their type, e.g.: + # + # RSpec.describe UsersController, :type => :controller do + # # ... + # end + # + # The different available types are documented in the features, such as in + # https://relishapp.com/rspec/rspec-rails/docs + config.infer_spec_type_from_file_location! + + # Filter lines from Rails gems in backtraces. + config.filter_rails_from_backtrace! + # arbitrary gems may also be filtered via: + # config.filter_gems_from_backtrace("gem name") +end diff --git a/spec/routing/bulkrax/importers_routing_spec.rb b/spec/routing/bulkrax/importers_routing_spec.rb new file mode 100644 index 00000000..3a574034 --- /dev/null +++ b/spec/routing/bulkrax/importers_routing_spec.rb @@ -0,0 +1,40 @@ +require "rails_helper" + +module Bulkrax + RSpec.describe ImportersController, type: :routing do + describe "routing" do + it "routes to #index" do + expect(:get => "/importers").to route_to("importers#index") + end + + it "routes to #new" do + expect(:get => "/importers/new").to route_to("importers#new") + end + + it "routes to #show" do + expect(:get => "/importers/1").to route_to("importers#show", :id => "1") + end + + it "routes to #edit" do + expect(:get => "/importers/1/edit").to route_to("importers#edit", :id => "1") + end + + + it "routes to #create" do + expect(:post => "/importers").to route_to("importers#create") + end + + it "routes to #update via PUT" do + expect(:put => "/importers/1").to route_to("importers#update", :id => "1") + end + + it "routes to #update via PATCH" do + expect(:patch => "/importers/1").to route_to("importers#update", :id => "1") + end + + it "routes to #destroy" do + expect(:delete => "/importers/1").to route_to("importers#destroy", :id => "1") + end + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..ce33d66d --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,96 @@ +# This file was generated by the `rails generate rspec:install` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + config.disable_monkey_patching! + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end diff --git a/spec/test_app/app/models/user.rb b/spec/test_app/app/models/user.rb new file mode 100644 index 00000000..379658a5 --- /dev/null +++ b/spec/test_app/app/models/user.rb @@ -0,0 +1,2 @@ +class User < ApplicationRecord +end diff --git a/spec/test_app/db/migrate/20181013023857_create_users.rb b/spec/test_app/db/migrate/20181013023857_create_users.rb new file mode 100644 index 00000000..08d86d80 --- /dev/null +++ b/spec/test_app/db/migrate/20181013023857_create_users.rb @@ -0,0 +1,9 @@ +class CreateUsers < ActiveRecord::Migration[5.1] + def change + create_table :users do |t| + t.string :email + + t.timestamps + end + end +end diff --git a/spec/test_app/db/schema.rb b/spec/test_app/db/schema.rb new file mode 100644 index 00000000..511d7c91 --- /dev/null +++ b/spec/test_app/db/schema.rb @@ -0,0 +1,47 @@ +# This file is auto-generated from the current state of the database. Instead +# of editing this file, please use the migrations feature of Active Record to +# incrementally modify your database, and then regenerate this schema definition. +# +# Note that this schema.rb definition is the authoritative source for your +# database schema. If you need to create the application database on another +# system, you should be using db:schema:load, not running all the migrations +# from scratch. The latter is a flawed and unsustainable approach (the more migrations +# you'll amass, the slower it'll run and the greater likelihood for issues). +# +# It's strongly recommended that you check this file into your version control system. + +ActiveRecord::Schema.define(version: 20181013023857) do + + create_table "bulkrax_importer_runs", force: :cascade do |t| + t.integer "importer_id" + t.integer "total_records", default: 0 + t.integer "enqueued_records", default: 0 + t.integer "processed_records", default: 0 + t.integer "deleted_records", default: 0 + t.integer "failed_records", default: 0 + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["importer_id"], name: "index_bulkrax_importer_runs_on_importer_id" + end + + create_table "bulkrax_importers", force: :cascade do |t| + t.string "name" + t.string "admin_set_id" + t.integer "user_id" + t.string "frequency" + t.string "parser_klass" + t.integer "limit" + t.text "parser_fields" + t.text "field_mapping" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["user_id"], name: "index_bulkrax_importers_on_user_id" + end + + create_table "users", force: :cascade do |t| + t.string "email" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + +end diff --git a/spec/test_app/spec/models/user_spec.rb b/spec/test_app/spec/models/user_spec.rb new file mode 100644 index 00000000..47a31bb4 --- /dev/null +++ b/spec/test_app/spec/models/user_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe User, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end diff --git a/spec/views/bulkrax/importers/edit.html.erb_spec.rb b/spec/views/bulkrax/importers/edit.html.erb_spec.rb new file mode 100644 index 00000000..7964d9ac --- /dev/null +++ b/spec/views/bulkrax/importers/edit.html.erb_spec.rb @@ -0,0 +1,39 @@ +require 'rails_helper' + +RSpec.describe "importers/edit", type: :view do + before(:each) do + @importer = assign(:importer, Importer.create!( + :name => "MyString", + :admin_set_id => "MyString", + :user => nil, + :frequency => "MyString", + :parser_klass => "MyString", + :limit => 1, + :parser_fields => "", + :field_mapping => "" + )) + end + + it "renders the edit importer form" do + render + + assert_select "form[action=?][method=?]", importer_path(@importer), "post" do + + assert_select "input[name=?]", "importer[name]" + + assert_select "input[name=?]", "importer[admin_set_id]" + + assert_select "input[name=?]", "importer[user_id]" + + assert_select "input[name=?]", "importer[frequency]" + + assert_select "input[name=?]", "importer[parser_klass]" + + assert_select "input[name=?]", "importer[limit]" + + assert_select "input[name=?]", "importer[parser_fields]" + + assert_select "input[name=?]", "importer[field_mapping]" + end + end +end diff --git a/spec/views/bulkrax/importers/index.html.erb_spec.rb b/spec/views/bulkrax/importers/index.html.erb_spec.rb new file mode 100644 index 00000000..f2714dd9 --- /dev/null +++ b/spec/views/bulkrax/importers/index.html.erb_spec.rb @@ -0,0 +1,40 @@ +require 'rails_helper' + +RSpec.describe "importers/index", type: :view do + before(:each) do + assign(:importers, [ + Importer.create!( + :name => "Name", + :admin_set_id => "Admin Set", + :user => nil, + :frequency => "Frequency", + :parser_klass => "Parser Klass", + :limit => 2, + :parser_fields => "", + :field_mapping => "" + ), + Importer.create!( + :name => "Name", + :admin_set_id => "Admin Set", + :user => nil, + :frequency => "Frequency", + :parser_klass => "Parser Klass", + :limit => 2, + :parser_fields => "", + :field_mapping => "" + ) + ]) + end + + it "renders a list of importers" do + render + assert_select "tr>td", :text => "Name".to_s, :count => 2 + assert_select "tr>td", :text => "Admin Set".to_s, :count => 2 + assert_select "tr>td", :text => nil.to_s, :count => 2 + assert_select "tr>td", :text => "Frequency".to_s, :count => 2 + assert_select "tr>td", :text => "Parser Klass".to_s, :count => 2 + assert_select "tr>td", :text => 2.to_s, :count => 2 + assert_select "tr>td", :text => "".to_s, :count => 2 + assert_select "tr>td", :text => "".to_s, :count => 2 + end +end diff --git a/spec/views/bulkrax/importers/new.html.erb_spec.rb b/spec/views/bulkrax/importers/new.html.erb_spec.rb new file mode 100644 index 00000000..1873b30e --- /dev/null +++ b/spec/views/bulkrax/importers/new.html.erb_spec.rb @@ -0,0 +1,39 @@ +require 'rails_helper' + +RSpec.describe "importers/new", type: :view do + before(:each) do + assign(:importer, Importer.new( + :name => "MyString", + :admin_set_id => "MyString", + :user => nil, + :frequency => "MyString", + :parser_klass => "MyString", + :limit => 1, + :parser_fields => "", + :field_mapping => "" + )) + end + + it "renders new importer form" do + render + + assert_select "form[action=?][method=?]", importers_path, "post" do + + assert_select "input[name=?]", "importer[name]" + + assert_select "input[name=?]", "importer[admin_set_id]" + + assert_select "input[name=?]", "importer[user_id]" + + assert_select "input[name=?]", "importer[frequency]" + + assert_select "input[name=?]", "importer[parser_klass]" + + assert_select "input[name=?]", "importer[limit]" + + assert_select "input[name=?]", "importer[parser_fields]" + + assert_select "input[name=?]", "importer[field_mapping]" + end + end +end diff --git a/spec/views/bulkrax/importers/show.html.erb_spec.rb b/spec/views/bulkrax/importers/show.html.erb_spec.rb new file mode 100644 index 00000000..8de26acd --- /dev/null +++ b/spec/views/bulkrax/importers/show.html.erb_spec.rb @@ -0,0 +1,28 @@ +require 'rails_helper' + +RSpec.describe "importers/show", type: :view do + before(:each) do + @importer = assign(:importer, Importer.create!( + :name => "Name", + :admin_set_id => "Admin Set", + :user => nil, + :frequency => "Frequency", + :parser_klass => "Parser Klass", + :limit => 2, + :parser_fields => "", + :field_mapping => "" + )) + end + + it "renders attributes in

    " do + render + expect(rendered).to match(/Name/) + expect(rendered).to match(/Admin Set/) + expect(rendered).to match(//) + expect(rendered).to match(/Frequency/) + expect(rendered).to match(/Parser Klass/) + expect(rendered).to match(/2/) + expect(rendered).to match(//) + expect(rendered).to match(//) + end +end