From ae3e014d06b50a874763f757a83659270ae1199b Mon Sep 17 00:00:00 2001 From: JojoVes Date: Thu, 8 Aug 2024 17:01:41 -0300 Subject: [PATCH 01/13] beginning of mads migration --- migrations/bceln_mads_to_term_person.yml | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 migrations/bceln_mads_to_term_person.yml diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml new file mode 100644 index 0000000..2d05e0e --- /dev/null +++ b/migrations/bceln_mads_to_term_person.yml @@ -0,0 +1,50 @@ +--- +id: bceln_mads_to_term_person +label: Create terms in the Person taxonomy. +migration_group: foxml_to_dgis +source: + plugin: dgi_migrate.source.migration + track_changes: true + migration: dgis_foxml_files + +destination: + plugin: entity:taxonomy_term + default_bundle: person + validate: true + +process: + _node_foxml_parsed: + - plugin: dgi_migrate.load_entity + source: fid + entity_type: entity:file + - plugin: dgi_migrate.method + method: getFileUri + - plugin: foxml.parse + _models: + - plugin: dgi_migrate.method + source: '@_node_foxml_parsed' + method: models + - plugin: skip_on_empty + method: row + status: + - plugin: dgi_migrate.subproperty + source: '@_node_foxml_parsed' + property: state + - plugin: static_map + map: + 'Active': 1 + 'Inactive': 0 + 'Deleted': 0 + name: + - plugin: default_value + default_value: 'testing' + +migration_dependencies: + required: + - dgis_foxml_files + +dependencies: + enforced: + module: + - dgi_migrate + - dgi_migrate_foxml_standard_mods From 6b2fde120f087d909c0fdc43ff283567314b7f04 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Thu, 8 Aug 2024 17:22:31 -0300 Subject: [PATCH 02/13] simple name mapping to test mads parsing pretty much just copy/pasted the mods process from these places in dgis_nodes and swapped 'mods' for 'mads': - https://github.com/discoverygarden/dgi_migrate/blob/9c2d9b7cfec4b213a7b08a69f98c28c185fea503/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml#L11-L19 - https://github.com/discoverygarden/dgi_migrate/blob/9c2d9b7cfec4b213a7b08a69f98c28c185fea503/modules/dgi_migrate_foxml_standard_mods/migrations/dgis_nodes.yml#L241-L269 I didn't get into the nested values yet, but this simple mapping in the 'name' field is just something I used to test that it is getting values from the MADS within the FOXML. --- migrations/bceln_mads_to_term_person.yml | 51 +++++++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 2d05e0e..2e721e6 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -6,6 +6,17 @@ source: plugin: dgi_migrate.source.migration track_changes: true migration: dgis_foxml_files + dsf_misc: + case_insensitive: &case_insensitive true + base_mads_node: &base_mads_node + plugin: dgi_migrate.process.xml.context_query + missing_behavior: skip_process + source: '@_mads_node' + xpath: '@_mads_xpath' + nested_mads_node: &nested_mads_node + plugin: dgi_migrate.process.xml.context_query + source: 'parent_value' + xpath: 'parent_row/dest/_mads_xpath' destination: plugin: entity:taxonomy_term @@ -26,6 +37,35 @@ process: method: models - plugin: skip_on_empty method: row + _mads_xpath: + - plugin: dgi_migrate.subindex + index: 'MADS' + source: '@_node_foxml_parsed' + missing_behavior: skip_process + - plugin: dgi_migrate.method + method: getUri + # XXX: An issue in the passing off of paths/URIs to libxml prevents the use + # of "dgi_migrate.process.xml.domfile" + - plugin: callback + callable: file_get_contents + - plugin: dgi_migrate.process.xml.domstring + missing_behavior: skip_process + - plugin: dgi_migrate.process.xml.xpath + namespaces: + mods: 'http://www.loc.gov/mads/v2' + xsi: 'http://www.w3.org/2001/XMLSchema-instance' + xlink: 'http://www.w3.org/1999/xlink' + _mads_node: + - plugin: skip_on_empty + method: process + source: '@_mads_xpath' + - plugin: dgi_migrate.method + method: query + args: + - '//mads:mads[1]' + - plugin: callback + callable: iterator_to_array + - plugin: array_shift status: - plugin: dgi_migrate.subproperty source: '@_node_foxml_parsed' @@ -36,8 +76,15 @@ process: 'Inactive': 0 'Deleted': 0 name: - - plugin: default_value - default_value: 'testing' + - << : *base_mads_node + query: 'mads:authority/mads:name' + - plugin: callback + callable: iterator_to_array + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + - plugin: single_value + - plugin: null_coalesce migration_dependencies: required: From 568a65e2381e7ced8b074b9feeafac20c655c539 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Fri, 9 Aug 2024 13:04:40 -0300 Subject: [PATCH 03/13] now correctly mapping name and preferred name And I added comments of what's left to help me pick this back up after the weekend. --- migrations/bceln_mads_to_term_person.yml | 109 +++++++++++++++++++++-- 1 file changed, 102 insertions(+), 7 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 2e721e6..e847b78 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -66,6 +66,69 @@ process: - plugin: callback callable: iterator_to_array - plugin: array_shift + _family_name: + - << : *base_mads_node + query: 'mads:authority/mads:name' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: row + - plugin: multiple_values + - plugin: dgi_migrate.sub_process + process_values: true + values: + _family: + - << : *nested_mads_node + query: 'normalize-space(mads:namePart[@type="family"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + - plugin: skip_on_empty + method: process + - plugin: dgi_migrate.process.single_extract + index: [ _family ] + _given_name: + - << : *base_mads_node + query: 'mads:authority/mads:name' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: row + - plugin: multiple_values + - plugin: dgi_migrate.sub_process + process_values: true + values: + _given: + - << : *nested_mads_node + query: 'normalize-space(mads:namePart[@type="given"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + - plugin: skip_on_empty + method: process + - plugin: dgi_migrate.process.single_extract + index: [ _given ] + _name_date: + - << : *base_mads_node + query: 'mads:authority/mads:name' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: row + - plugin: multiple_values + - plugin: dgi_migrate.sub_process + process_values: true + values: + _date: + - << : *nested_mads_node + query: 'normalize-space(mads:namePart[@type="date"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + - plugin: skip_on_empty + method: process + - plugin: dgi_migrate.process.single_extract + index: [ _date ] status: - plugin: dgi_migrate.subproperty source: '@_node_foxml_parsed' @@ -76,15 +139,47 @@ process: 'Inactive': 0 'Deleted': 0 name: - - << : *base_mads_node - query: 'mads:authority/mads:name' + - plugin: get + source: + - '@_family_name' + - '@_given_name' + - '@_name_date' + - plugin: flatten - plugin: callback - callable: iterator_to_array - - plugin: multiple_values - - plugin: dgi_migrate.subproperty - property: nodeValue - - plugin: single_value + callable: array_filter + - plugin: concat + delimiter: ', ' + - plugin: skip_on_empty + method: row + # TODO: add 'tid' if a term with the same 'name' value exists. + field_person_preferred_name/family: + - plugin: get + source: '@_family_name' + - plugin: skip_on_empty + method: process + - plugin: flatten + - plugin: null_coalesce + field_person_preferred_name/given: + - plugin: get + source: '@_given_name' + - plugin: skip_on_empty + method: process + - plugin: flatten - plugin: null_coalesce + # TODO: add field_person_email_contact + # TODO: add field_phone + # TODO: add field_person_department (entity reference to term in vocab: discipline) + # TODO: add field_position + # TODO: add field_affiliation_date (EDTF date range) + # TODO: add field_description (two possible mapping sources) + # TODO: add field_address + # TODO: add field_status + # TODO: add field_orcid + # TODO: add field_field_of_activity + # TODO: add field_identifier_other + # TODO: add field_person_alternate_names/family + # TODO: add field_person_alternate_names/given + # TODO: add field_photograph (entity reference to Image Media) migration_dependencies: required: From 47a95615aac940d447206b41ded8c1dda9a1fef7 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Mon, 12 Aug 2024 11:01:30 -0300 Subject: [PATCH 04/13] simplify some processing --- migrations/bceln_mads_to_term_person.yml | 62 +++--------------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index e847b78..9c9d333 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -68,67 +68,22 @@ process: - plugin: array_shift _family_name: - << : *base_mads_node - query: 'mads:authority/mads:name' - - plugin: callback - callable: iterator_to_array - - plugin: skip_on_empty - method: row - - plugin: multiple_values - - plugin: dgi_migrate.sub_process - process_values: true - values: - _family: - - << : *nested_mads_node - query: 'normalize-space(mads:namePart[@type="family"][normalize-space()][1])' - method: evaluate - - plugin: skip_on_empty - method: process + query: 'normalize-space(mads:authority/mads:name/mads:namePart[@type="family"][normalize-space()][1])' + method: evaluate - plugin: skip_on_empty method: process - - plugin: dgi_migrate.process.single_extract - index: [ _family ] _given_name: - << : *base_mads_node - query: 'mads:authority/mads:name' - - plugin: callback - callable: iterator_to_array - - plugin: skip_on_empty - method: row - - plugin: multiple_values - - plugin: dgi_migrate.sub_process - process_values: true - values: - _given: - - << : *nested_mads_node - query: 'normalize-space(mads:namePart[@type="given"][normalize-space()][1])' - method: evaluate - - plugin: skip_on_empty - method: process + query: 'normalize-space(mads:authority/mads:name/mads:namePart[@type="given"][normalize-space()][1])' + method: evaluate - plugin: skip_on_empty method: process - - plugin: dgi_migrate.process.single_extract - index: [ _given ] _name_date: - << : *base_mads_node - query: 'mads:authority/mads:name' - - plugin: callback - callable: iterator_to_array - - plugin: skip_on_empty - method: row - - plugin: multiple_values - - plugin: dgi_migrate.sub_process - process_values: true - values: - _date: - - << : *nested_mads_node - query: 'normalize-space(mads:namePart[@type="date"][normalize-space()][1])' - method: evaluate - - plugin: skip_on_empty - method: process + query: 'normalize-space(mads:authority/mads:name/mads:namePart[@type="date"][normalize-space()][1])' + method: evaluate - plugin: skip_on_empty method: process - - plugin: dgi_migrate.process.single_extract - index: [ _date ] status: - plugin: dgi_migrate.subproperty source: '@_node_foxml_parsed' @@ -144,7 +99,6 @@ process: - '@_family_name' - '@_given_name' - '@_name_date' - - plugin: flatten - plugin: callback callable: array_filter - plugin: concat @@ -157,15 +111,11 @@ process: source: '@_family_name' - plugin: skip_on_empty method: process - - plugin: flatten - - plugin: null_coalesce field_person_preferred_name/given: - plugin: get source: '@_given_name' - plugin: skip_on_empty method: process - - plugin: flatten - - plugin: null_coalesce # TODO: add field_person_email_contact # TODO: add field_phone # TODO: add field_person_department (entity reference to term in vocab: discipline) From 7dcf6bb0d3e6a862f647c296d1593ddc38efb4ec Mon Sep 17 00:00:00 2001 From: JojoVes Date: Mon, 12 Aug 2024 12:26:27 -0300 Subject: [PATCH 05/13] add tid processing to find pre-existing terms --- migrations/bceln_mads_to_term_person.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 9c9d333..3439d3b 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -105,7 +105,17 @@ process: delimiter: ', ' - plugin: skip_on_empty method: row - # TODO: add 'tid' if a term with the same 'name' value exists. + tid: + - plugin: dgi_migrate.process.entity_query + entity_type: entity:taxonomy_term + static_conditions: + - [vid, person] + conditions: + - [name, '@name'] + - plugin: skip_on_empty + method: process + - plugin: flatten + - plugin: null_coalesce field_person_preferred_name/family: - plugin: get source: '@_family_name' From bd20d06a8a41c500f6d4e2df7fab4beb0f6ecff3 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Mon, 12 Aug 2024 18:12:32 -0300 Subject: [PATCH 06/13] more progress mapping more of the fields --- migrations/bceln_mads_to_term_person.yml | 197 +++++++++++++++++++++-- 1 file changed, 185 insertions(+), 12 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 3439d3b..1ec298d 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -17,6 +17,36 @@ source: plugin: dgi_migrate.process.xml.context_query source: 'parent_value' xpath: 'parent_row/dest/_mads_xpath' + generic_term: + after: &generic_term_after + _auth_value_uri: + - << : *nested_mads_node + query: 'string(@valueURI)' + method: evaluate + _auth_source: + - << : *nested_mads_node + query: 'string(@authority)' + method: evaluate + _value: + - << : *nested_mads_node + query: 'normalize-space(.)' + method: evaluate + actual: + - plugin: get + source: + - '@_auth_source' + - '@_auth_value_uri' + - '@_value' + # XXX: Needs to be provided wherever this is used, corresponding + # to the vocab in which to do the things. + - '@_vid' + - plugin: flatten + - plugin: migration_lookup + migration: dgis_stub_terms_generic + stub_id: dgis_stub_terms_generic + extract: &generic_term_extract + plugin: dgi_migrate.process.single_extract + index: [actual] destination: plugin: entity:taxonomy_term @@ -52,7 +82,7 @@ process: missing_behavior: skip_process - plugin: dgi_migrate.process.xml.xpath namespaces: - mods: 'http://www.loc.gov/mads/v2' + mads: 'http://www.loc.gov/mads/v2' xsi: 'http://www.w3.org/2001/XMLSchema-instance' xlink: 'http://www.w3.org/1999/xlink' _mads_node: @@ -126,24 +156,167 @@ process: source: '@_given_name' - plugin: skip_on_empty method: process - # TODO: add field_person_email_contact - # TODO: add field_phone - # TODO: add field_person_department (entity reference to term in vocab: discipline) - # TODO: add field_position - # TODO: add field_affiliation_date (EDTF date range) - # TODO: add field_description (two possible mapping sources) - # TODO: add field_address - # TODO: add field_status - # TODO: add field_orcid - # TODO: add field_field_of_activity - # TODO: add field_identifier_other + field_position: + - << : *base_mads_node + query: 'mads:affiliation/mads:position' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + field_status: + - << : *base_mads_node + query: 'mads:note[@type="status"]' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + - plugin: single_value + - plugin: null_coalesce + field_field_of_activity: + - << : *base_mads_node + query: 'mads:fieldOfActivity' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + field_identifier_other: + - << : *base_mads_node + query: 'mads:identifier[@type=u1]' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + field_person_department: + - <<: *base_mads_node + query: 'mads:affiliation/mads:organization' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.sub_process + process_values: true + values: + _auth_value_uri: + - plugin: default_value + default_value: '' + _auth_source: + - plugin: default_value + default_value: '' + _vid: + - plugin: default_value + default_value: discipline + <<: *generic_term_after + - <<: *generic_term_extract + _field_affiliation_date_start: + - << : *base_mads_node + query: 'normalize-space(mads:affiliation/mads:dateValid[@point="start"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + _field_affiliation_date_end: + - << : *base_mads_node + query: 'normalize-space(mads:affiliation/mads:dateValid[@point="end"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + # XXX: this should probably be adjusted to throw a message about the invalid date. + field_affiliation_date: + - plugin: get + source: + - '@_field_affiliation_date_start' + - '@_field_affiliation_date_end' + - plugin: skip_on_empty + method: process + - plugin: concat + delimiter: '/' + - plugin: dgi_migrate_edtf_validator + missing_behavior: skip_process + _mads_note_type_history: + - << : *base_mads_node + query: 'normalize-space(mads:note[@type="history"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + _mads_note_orcid: + - << : *base_mads_node + query: 'normalize-space(mads:note[not(@type) and starts-with(text(), "https://orcid.org")])' + method: evaluate + - plugin: skip_on_empty + method: process + _mads_note_not_orcid: + - << : *base_mads_node + query: 'normalize-space(mads:note[not(@type) and not(starts-with(text(), "https://orcid.org"))])' + method: evaluate + - plugin: skip_on_empty + method: process + field_description: + - plugin: get + source: + - '@_mads_note_type_history' + - '@_mads_note_not_orcid' + - plugin: skip_on_empty + method: process + - plugin: callback + callable: array_filter + - plugin: concat + delimiter: ' ' + field_orcid: + - plugin: get + source: '@_mads_note_orcid' + - plugin: skip_on_empty + method: process # TODO: add field_person_alternate_names/family # TODO: add field_person_alternate_names/given # TODO: add field_photograph (entity reference to Image Media) + # Cannot test these until more complete test foxml is made: + field_person_email_contact: + - << : *base_mads_node + query: 'mads:affiliation/mads:email' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + field_phone: + - << : *base_mads_node + query: 'mads:affiliation/mads:phone' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + field_address: + - << : *base_mads_node + query: 'mads:note[@type="address"]' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue migration_dependencies: required: - dgis_foxml_files + - dgis_stub_terms_generic dependencies: enforced: From ed87c88bce1009fad3bb0f1b0a55ef4f803591d6 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Tue, 13 Aug 2024 09:55:53 -0300 Subject: [PATCH 07/13] add note about field_website this mapping wasn't in the ticket description but it's in the mapping spreadsheet --- migrations/bceln_mads_to_term_person.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 1ec298d..002252a 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -278,6 +278,7 @@ process: source: '@_mads_note_orcid' - plugin: skip_on_empty method: process + # TODO: add field_website # TODO: add field_person_alternate_names/family # TODO: add field_person_alternate_names/given # TODO: add field_photograph (entity reference to Image Media) From fecb6399e17701091209362fd5ee5de45bc1d605 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Tue, 13 Aug 2024 15:14:47 -0300 Subject: [PATCH 08/13] more fields mapped website is simple, the alternate names are mapped simply, at least for now, but might not handle multiple alternate names (not sure if that's necessary at the moment?) --- migrations/bceln_mads_to_term_person.yml | 27 +++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 002252a..79081d0 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -278,9 +278,30 @@ process: source: '@_mads_note_orcid' - plugin: skip_on_empty method: process - # TODO: add field_website - # TODO: add field_person_alternate_names/family - # TODO: add field_person_alternate_names/given + field_website: + - << : *base_mads_node + query: 'mads:url' + - plugin: callback + callable: iterator_to_array + - plugin: skip_on_empty + method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue + # XXX: Unsure how this might handle multiple field_person_alternate_names, or + # if it even needs to. + field_person_alternate_names/family: + - << : *base_mads_node + query: 'normalize-space(mads:variant/mads:name/mads:namePart[@type="family"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process + field_person_alternate_names/given: + - << : *base_mads_node + query: 'normalize-space(mads:variant/mads:name/mads:namePart[@type="given"][normalize-space()][1])' + method: evaluate + - plugin: skip_on_empty + method: process # TODO: add field_photograph (entity reference to Image Media) # Cannot test these until more complete test foxml is made: field_person_email_contact: From 899097be047b37474d94fe9fd367f66f23ee7f38 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Wed, 14 Aug 2024 14:36:39 -0300 Subject: [PATCH 09/13] stashing work in progress Currently getting an error about the media from bceln_person_tn_media not being able to access the file created by bceln_person_tn_file, but it works fine if accessing a pre-existing file. Committing the current messy state of things to share more easily with the team to get help. --- migrations/bceln_mads_to_term_person.yml | 20 ++-- migrations/bceln_person_tn_file.yml | 121 +++++++++++++++++++++++ migrations/bceln_person_tn_media.yml | 104 +++++++++++++++++++ 3 files changed, 238 insertions(+), 7 deletions(-) create mode 100644 migrations/bceln_person_tn_file.yml create mode 100644 migrations/bceln_person_tn_media.yml diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 79081d0..16884ce 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -71,7 +71,7 @@ process: - plugin: dgi_migrate.subindex index: 'MADS' source: '@_node_foxml_parsed' - missing_behavior: skip_process + missing_behavior: skip_row - plugin: dgi_migrate.method method: getUri # XXX: An issue in the passing off of paths/URIs to libxml prevents the use @@ -79,7 +79,7 @@ process: - plugin: callback callable: file_get_contents - plugin: dgi_migrate.process.xml.domstring - missing_behavior: skip_process + missing_behavior: skip_row - plugin: dgi_migrate.process.xml.xpath namespaces: mads: 'http://www.loc.gov/mads/v2' @@ -87,7 +87,7 @@ process: xlink: 'http://www.w3.org/1999/xlink' _mads_node: - plugin: skip_on_empty - method: process + method: row source: '@_mads_xpath' - plugin: dgi_migrate.method method: query @@ -232,7 +232,6 @@ process: method: evaluate - plugin: skip_on_empty method: process - # XXX: this should probably be adjusted to throw a message about the invalid date. field_affiliation_date: - plugin: get source: @@ -243,7 +242,7 @@ process: - plugin: concat delimiter: '/' - plugin: dgi_migrate_edtf_validator - missing_behavior: skip_process + missing_behavior: skip_process # TODO: change this to skip_row; just keeping as skip_process to facilitate development with the current sample FOXML I have. _mads_note_type_history: - << : *base_mads_node query: 'normalize-space(mads:note[@type="history"][normalize-space()][1])' @@ -302,7 +301,14 @@ process: method: evaluate - plugin: skip_on_empty method: process - # TODO: add field_photograph (entity reference to Image Media) + field_photograph/0/target_id: + - plugin: migration_lookup + migration: bceln_person_tn_media + source: fid + no_stub: true + - plugin: skip_on_empty + method: process + # Cannot test these until more complete test foxml is made: field_person_email_contact: - << : *base_mads_node @@ -339,7 +345,7 @@ migration_dependencies: required: - dgis_foxml_files - dgis_stub_terms_generic - + - bceln_person_tn_media dependencies: enforced: module: diff --git a/migrations/bceln_person_tn_file.yml b/migrations/bceln_person_tn_file.yml new file mode 100644 index 0000000..4b9e1f6 --- /dev/null +++ b/migrations/bceln_person_tn_file.yml @@ -0,0 +1,121 @@ +--- +id: bceln_person_tn_file +label: Create file entities from TNs in Person FOXML +migration_group: foxml_to_dgis +source: + plugin: dgi_migrate.source.migration + track_changes: true + migration: dgis_foxml_files + constants: + file_dest: 'repo-bin:/' + valid_models: + - 'info:fedora/islandora:personCModel' +destination: + plugin: entity:file + validate: true +process: + _parsed: + - plugin: dgi_migrate.load_entity + source: fid + entity_type: entity:file + - plugin: dgi_migrate.method + method: getFileUri + - plugin: foxml.parse + _models: + - plugin: dgi_migrate.method + source: '@_parsed' + method: models + - plugin: skip_on_empty + method: row + _valid_present_models: + - plugin: callback + callable: array_intersect + unpack_source: true + source: + - constants/valid_models + - '@_models' + - plugin: skip_on_empty + method: row + _source_dsid: + - plugin: static_map + source: '@_valid_present_models' + bypass: false + map: + 'info:fedora/islandora:personCModel': 'TN' + - plugin: extract + index: [0] + _latest: + - plugin: dgi_migrate.subindex + source: '@_parsed' + index_from_destination: _source_dsid + skip_row_if_missing: true + - plugin: dgi_migrate.method + method: latest + created: + - plugin: dgi_migrate.subproperty + source: '@_latest' + property: CREATED + - plugin: callback + callable: strtotime + _source_uri: + - plugin: dgi_migrate.method + source: '@_latest' + method: getUri + _path: + - plugin: format_date + source: '@created' + from_format: U + to_format: 'Y-m' + filemime: + - plugin: dgi_migrate.subproperty + property: MIMETYPE + source: '@_latest' + _ext: + plugin: dgi_migrate.process.extension_from_mimetype + source: '@filemime' + _safe_pid: + - plugin: dgi_migrate.subproperty + source: '@_parsed' + property: PID + - plugin: machine_name + filename: + - plugin: concat + source: + - '@_safe_pid' + - '@_ext' + delimiter: '.' + _dest_uri: + - plugin: concat + source: + - constants/file_dest + - '@_path' + - '@filename' + delimiter: '/' + uri: + - plugin: dgi_migrate.naive_file_copy + file_exists: rename + source: + - '@_source_uri' + - '@_dest_uri' + filesize: + - plugin: callback + source: '@uri' + callable: filesize + - plugin: skip_on_value + method: row + value: 0 + message: 'Thumbnail file has a filesize of 0' + status: + - plugin: default_value + default_value: 1 + uid: + - plugin: default_value + source: shared/default_uid + default_value: 0 +migration_dependencies: + required: + - dgis_foxml_files +dependencies: + enforced: + module: + - dgi_migrate_foxml_standard_mods diff --git a/migrations/bceln_person_tn_media.yml b/migrations/bceln_person_tn_media.yml new file mode 100644 index 0000000..1b9517f --- /dev/null +++ b/migrations/bceln_person_tn_media.yml @@ -0,0 +1,104 @@ +--- +id: bceln_person_tn_media +label: Create thumbnail media entities from Person FOXML +migration_group: foxml_to_dgis +source: + plugin: dgi_migrate.source.migration + track_changes: true + migration: dgis_foxml_files + constants: + valid_models: + - 'info:fedora/islandora:personCModel' +destination: + plugin: entity:media + default_bundle: image + validate: true +process: + _file_id: + - plugin: migration_lookup + migration: bceln_person_tn_file + source: fid + no_stub: true + - plugin: skip_on_empty + method: row + _file: + plugin: dgi_migrate.load_entity + source: '@_file_id' + entity_type: entity:file + _parsed: + - plugin: dgi_migrate.load_entity + source: fid + entity_type: entity:file + - plugin: dgi_migrate.method + method: getFileUri + - plugin: foxml.parse + _models: + - plugin: dgi_migrate.method + source: '@_parsed' + method: models + - plugin: skip_on_empty + method: row + _valid_present_models: + - plugin: callback + callable: array_intersect + unpack_source: true + source: + - constants/valid_models + - '@_models' + - plugin: skip_on_empty + method: row + name: + - plugin: dgi_migrate.subproperty + property: label + source: '@_parsed' + field_media_image/target_id: + - plugin: get + source: '@_file_id' + # XXX: To see that this does work with pre-existing files, you can swap the + # above with this (and a default value that matches some existing file in + # your local test env). + #- plugin: default_value + # default_value: 59 + field_media_image/alt: '@name' + bundle: + - plugin: static_map + source: '@_valid_present_models' + bypass: false + map: + 'info:fedora/islandora:personCModel': 'image' + - plugin: extract + index: [0] + field_file_size: + - plugin: dgi_migrate.method + method: getSize + source: '@_file' + field_media_use: + - plugin: default_value + default_value: http://pcdm.org/use#ThumbnailImage + - plugin: entity_lookup + bundle_key: vid + bundle: islandora_media_use + value_key: field_external_uri + entity_type: taxonomy_term + # XXX: migrate_plus's case comparison makes assumptions about the entity's + # "main" property... we want "uri", but it assumes "value". + ignore_case: true + field_mime_type: + - plugin: dgi_migrate.method + method: getMimeType + source: '@_file' + status: + - plugin: default_value + default_value: 1 + uid: + - plugin: dgi_migrate.method + method: getOwnerId + source: '@_file' +migration_dependencies: + required: + - dgis_foxml_files + - bceln_person_tn_file +dependencies: + enforced: + module: + - dgi_migrate_foxml_standard_mods From 95824c81d497555c80fffc4902ae238914f8bcd6 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Wed, 14 Aug 2024 15:12:01 -0300 Subject: [PATCH 10/13] keep line-breaks in description I just noticed the space normalization removed expected formatting from the description value and this is a quick adjustment to address that. --- migrations/bceln_mads_to_term_person.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 16884ce..5de8175 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -245,10 +245,14 @@ process: missing_behavior: skip_process # TODO: change this to skip_row; just keeping as skip_process to facilitate development with the current sample FOXML I have. _mads_note_type_history: - << : *base_mads_node - query: 'normalize-space(mads:note[@type="history"][normalize-space()][1])' - method: evaluate + query: 'mads:note[@type="history"]' + - plugin: callback + callable: iterator_to_array - plugin: skip_on_empty method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue _mads_note_orcid: - << : *base_mads_node query: 'normalize-space(mads:note[not(@type) and starts-with(text(), "https://orcid.org")])' @@ -257,10 +261,14 @@ process: method: process _mads_note_not_orcid: - << : *base_mads_node - query: 'normalize-space(mads:note[not(@type) and not(starts-with(text(), "https://orcid.org"))])' - method: evaluate + query: 'mads:note[not(@type) and not(starts-with(text(), "https://orcid.org"))]' + - plugin: callback + callable: iterator_to_array - plugin: skip_on_empty method: process + - plugin: multiple_values + - plugin: dgi_migrate.subproperty + property: nodeValue field_description: - plugin: get source: @@ -268,10 +276,9 @@ process: - '@_mads_note_not_orcid' - plugin: skip_on_empty method: process + - plugin: flatten - plugin: callback callable: array_filter - - plugin: concat - delimiter: ' ' field_orcid: - plugin: get source: '@_mads_note_orcid' From 0565746e5675bcb3d964ce73eed3a817a696092d Mon Sep 17 00:00:00 2001 From: JojoVes Date: Thu, 15 Aug 2024 10:36:08 -0300 Subject: [PATCH 11/13] clean up some loose ends --- migrations/bceln_mads_to_term_person.yml | 10 +--------- migrations/bceln_person_tn_media.yml | 5 ----- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 5de8175..83abddc 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -61,12 +61,6 @@ process: - plugin: dgi_migrate.method method: getFileUri - plugin: foxml.parse - _models: - - plugin: dgi_migrate.method - source: '@_node_foxml_parsed' - method: models - - plugin: skip_on_empty - method: row _mads_xpath: - plugin: dgi_migrate.subindex index: 'MADS' @@ -242,7 +236,7 @@ process: - plugin: concat delimiter: '/' - plugin: dgi_migrate_edtf_validator - missing_behavior: skip_process # TODO: change this to skip_row; just keeping as skip_process to facilitate development with the current sample FOXML I have. + missing_behavior: skip_row _mads_note_type_history: - << : *base_mads_node query: 'mads:note[@type="history"]' @@ -315,8 +309,6 @@ process: no_stub: true - plugin: skip_on_empty method: process - - # Cannot test these until more complete test foxml is made: field_person_email_contact: - << : *base_mads_node query: 'mads:affiliation/mads:email' diff --git a/migrations/bceln_person_tn_media.yml b/migrations/bceln_person_tn_media.yml index 1b9517f..56af9d8 100644 --- a/migrations/bceln_person_tn_media.yml +++ b/migrations/bceln_person_tn_media.yml @@ -54,11 +54,6 @@ process: field_media_image/target_id: - plugin: get source: '@_file_id' - # XXX: To see that this does work with pre-existing files, you can swap the - # above with this (and a default value that matches some existing file in - # your local test env). - #- plugin: default_value - # default_value: 59 field_media_image/alt: '@name' bundle: - plugin: static_map From 076961a41b15bf7dd67ed89ca7eb03f7cfc0e429 Mon Sep 17 00:00:00 2001 From: JojoVes Date: Thu, 15 Aug 2024 10:42:46 -0300 Subject: [PATCH 12/13] include migration dependency on edtf validator --- migrations/bceln_mads_to_term_person.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 83abddc..519bbf3 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -350,3 +350,4 @@ dependencies: module: - dgi_migrate - dgi_migrate_foxml_standard_mods + - dgi_migrate_edtf_validator From c1b9d751779bbf633587e21008e4b935f3eb11a1 Mon Sep 17 00:00:00 2001 From: Chris MacDonald <31731869+chrismacdonaldw@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:46:58 -0300 Subject: [PATCH 13/13] Add missing quotations --- migrations/bceln_mads_to_term_person.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrations/bceln_mads_to_term_person.yml b/migrations/bceln_mads_to_term_person.yml index 519bbf3..2e89242 100644 --- a/migrations/bceln_mads_to_term_person.yml +++ b/migrations/bceln_mads_to_term_person.yml @@ -184,7 +184,7 @@ process: property: nodeValue field_identifier_other: - << : *base_mads_node - query: 'mads:identifier[@type=u1]' + query: 'mads:identifier[@type="u1"]' - plugin: callback callable: iterator_to_array - plugin: skip_on_empty