Skip to content

Commit

Permalink
Merge pull request #42 from OpenGov-OpenData/jguo144/2024-09-04/updat…
Browse files Browse the repository at this point in the history
…e-field-mapping

Update field mapping to package extras
  • Loading branch information
scastineyras authored Sep 5, 2024
2 parents 78b4bcf + 7d6adea commit 0b85caa
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 10 deletions.
37 changes: 27 additions & 10 deletions ckanext/dcat/configuration_processors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from builtins import str
from past.builtins import basestring
import re
import json

Expand Down Expand Up @@ -123,7 +122,7 @@ def check_config(config_obj):
if 'default_groups' in config_obj:
if not isinstance(config_obj['default_groups'], list):
raise ValueError('default_groups must be a *list* of group names/ids')
if config_obj['default_groups'] and not isinstance(config_obj['default_groups'][0], basestring):
if config_obj['default_groups'] and not isinstance(config_obj['default_groups'][0], str):
raise ValueError('default_groups must be a list of group names/ids (i.e. strings)')

# Check if default groups exist
Expand Down Expand Up @@ -159,6 +158,9 @@ def check_config(config_obj):
if 'default_extras' in config_obj:
if not isinstance(config_obj['default_extras'], dict):
raise ValueError('default_extras must be a dictionary')
for key in config_obj.get('default_extras'):
if key == 'guid':
raise ValueError('default_extras cannot be used to modify harvest guid')

@staticmethod
def modify_package_dict(package_dict, config, dcat_dict):
Expand Down Expand Up @@ -193,9 +195,9 @@ def check_config(config_obj):
if 'default_values' in config_obj:
if not isinstance(config_obj['default_values'], list):
raise ValueError('default_values must be a *list* of dictionaries')
if config_obj['default_values'] and not isinstance(config_obj['default_values'][0], dict):
raise ValueError('default_values must be a *list* of dictionaries')
for default_field in config_obj.get('default_values', []):
if not isinstance(default_field, dict):
raise ValueError('default_values must be a *list* of dictionaries')
for key in default_field:
if key in ['id', 'name']:
raise ValueError('default_values cannot be used to modify dataset id/name')
Expand Down Expand Up @@ -223,11 +225,19 @@ def check_config(config_obj):
if 'map_fields' in config_obj:
if not isinstance(config_obj['map_fields'], list):
raise ValueError('map_fields must be a *list* of dictionaries')
if config_obj['map_fields'] and not isinstance(config_obj['map_fields'][0], dict):
raise ValueError('map_fields must be a *list* of dictionaries')
for map_field in config_obj.get('map_fields', []):
if not isinstance(map_field, dict):
raise ValueError('map_fields must be a *list* of dictionaries')
if not map_field.get('source'):
raise ValueError('map_fields must have a source field')
if not map_field.get('target'):
raise ValueError('map_fields must have a target field')
if map_field.get('target', '') in ['id', 'name']:
raise ValueError('map_fields cannot be used to modify dataset id/name')
if map_field.get('extras') and not isinstance(map_field.get('extras'), bool):
raise ValueError('map_fields extras must be boolean')
if map_field.get('extras') and map_field.get('target') == 'guid':
raise ValueError('map_fields extras cannot be used to modify harvest guid')

@staticmethod
def modify_package_dict(package_dict, config, dcat_dict):
Expand Down Expand Up @@ -270,14 +280,21 @@ def modify_package_dict(package_dict, config, dcat_dict):
'%Y-%m-%dT%H:%M:%S.%fZ'
).strftime('%H:%M:%S.%fZ')

# Map value to dataset field
package_dict[target_field] = value
if not 'extras' in package_dict:
package_dict['extras'] = []

# Remove from extras any keys present in the config
existing_extra = get_extra(target_field, package_dict)
if existing_extra:
package_dict['extras'].remove(existing_extra)

if map_field.get('extras', False):
# Map value to extras
package_dict['extras'].append({'key': target_field, 'value': value})
else:
# Map value to dataset field
package_dict[target_field] = value


class CompositeMapping(BaseConfigProcessor):

Expand All @@ -286,14 +303,14 @@ def check_config(config_obj):
if 'composite_field_mapping' in config_obj:
if not isinstance(config_obj['composite_field_mapping'], list):
raise ValueError('composite_field_mapping must be a *list* of dictionaries')
if config_obj['composite_field_mapping'] and not isinstance(config_obj['composite_field_mapping'][0], dict):
raise ValueError('composite_field_mapping must be a *list* of dictionaries')
try:
schema_result = get_action('scheming_dataset_schema_show')({}, {'type': 'dataset'})
dataset_schema = schema_result.get('dataset_fields')
except:
pass
for composite_map in config_obj.get('composite_field_mapping', []):
if not isinstance(composite_map, dict):
raise ValueError('composite_field_mapping must be a *list* of dictionaries')
field_found = False
field_name = list(composite_map)[0]
for dataset_field in dataset_schema:
Expand Down
26 changes: 26 additions & 0 deletions ckanext/dcat/tests/test_configuration_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,32 @@ def test_modify_package_mapping_values_with_modified_time(self):

assert package["modified_time"] == "11:16:25.000000Z"

def test_modify_package_mapping_values_with_extras(self):
package = {
"title": "Test Dataset",
"name": "test-dataset"
}
config = {
"map_fields": [
{
"source": "language",
"target": "language",
"default": "English",
"extras": True
}
]
}
dcat_dict = {
"title": "Test Dataset",
"name": "test-dataset",
"language": "Spanish"
}

self.processor.modify_package_dict(package, config, dcat_dict)

assert package["extras"][0]["key"] == "language"
assert package["extras"][0]["value"] == "Spanish"


class TestCompositeMapping:

Expand Down

0 comments on commit 0b85caa

Please sign in to comment.