From 7ea1b7cf9b8c3c732c3387955f4c14396d70d1f5 Mon Sep 17 00:00:00 2001 From: devchima Date: Mon, 17 Feb 2025 05:48:03 -0500 Subject: [PATCH 01/11] CMS Forms Flexible Imports --- home/import_assessments.py | 82 ++++++++++++++---- .../assessment_missing_related_page.csv | 8 +- .../assessments_missing_generic_error.csv | 6 ++ .../assessments_missing_locale.csv | 6 ++ .../assessments_missing_title.csv | 6 ++ .../assessments_multiple_missing_headers.csv | 6 ++ .../import-export-data/broken_assessment.csv | 2 +- .../snake_case_assessments.csv | 5 ++ home/tests/test_assessment_import_export.py | 86 ++++++++++++++++++- 9 files changed, 183 insertions(+), 24 deletions(-) create mode 100644 home/tests/import-export-data/assessments_missing_generic_error.csv create mode 100644 home/tests/import-export-data/assessments_missing_locale.csv create mode 100644 home/tests/import-export-data/assessments_missing_title.csv create mode 100644 home/tests/import-export-data/assessments_multiple_missing_headers.csv create mode 100644 home/tests/import-export-data/snake_case_assessments.csv diff --git a/home/import_assessments.py b/home/import_assessments.py index 00fedccf..3cb2b61c 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -1,5 +1,6 @@ import contextlib import csv +import re from dataclasses import dataclass, field, fields from queue import Queue from typing import Any @@ -15,6 +16,7 @@ from home.models import Assessment, ContentPage, HomePage # type: ignore AssessmentId = tuple[str, Locale] +MANDATORY_HEADERS = ["title", "question", "slug", "generic_error", "locale"] class ImportAssessmentException(Exception): @@ -96,9 +98,32 @@ def save_assessments(self) -> None: ) def parse_file(self) -> list["AssessmentRow"]: + """ + Converts the iterator to a list of rows. + If there are rows: + a. Extracts the original headers from the first row. + b. Converts the original headers to snake_case and creates a mapping from original headers to snake_case headers. + c. Validates that the snake_case headers contain all mandatory headers. + d. Transforms each row to use snake_case headers. + """ + row_iterator = parse_file(self.file_content, self.file_type) + rows = [row for _, row in row_iterator] + + if rows: + original_headers = rows[0].keys() + headers_mapping = { + header: self.to_snake_case(header) for header in original_headers + } + snake_case_headers = list(headers_mapping.values()) + self.validate_headers(snake_case_headers, MANDATORY_HEADERS, row_num=1) + transformed_rows = [ + {headers_mapping[key]: value for key, value in row.items()} + for row in rows + ] + return [ - AssessmentRow.from_flat(row, i) - for i, row in parse_file(self.file_content, self.file_type) + AssessmentRow.from_flat(row, i + 2) + for i, row in enumerate(transformed_rows) ] def set_progress(self, message: str, progress: int) -> None: @@ -185,6 +210,21 @@ def create_shadow_assessment_from_row( ) assessment.questions.append(question) + def validate_headers( + self, headers: list[str], MANDATORY_HEADERS: list[str], row_num: int + ) -> None: + missing_headers = [ + header for header in MANDATORY_HEADERS if header not in headers + ] + if missing_headers: + raise ImportAssessmentException( + message=f"Missing mandatory headers: {', '.join(missing_headers)}", + row_num=row_num, + ) + + def to_snake_case(self, s: str) -> str: + return re.sub(r"[\W_]+", "_", s).lower().strip("_") + # Since wagtail page models are so difficult to create and modify programatically, # we instead store all the pages as these shadow objects, which we can then at the end @@ -360,6 +400,18 @@ class AssessmentRow: def fields(cls) -> list[str]: return [field.name for field in fields(cls)] + @classmethod + def check_missing_fields(cls, row: dict[str, str], row_num: int) -> None: + """ + Checks for missing required fields in the row and raises an exception if any is missing. + """ + missing_fields = [field for field in MANDATORY_HEADERS if field not in row] + if missing_fields: + raise ImportAssessmentException( + f"The import file is missing required fields: {', '.join(missing_fields)}", + row_num, + ) + @classmethod def from_flat(cls, row: dict[str, str], row_num: int) -> "AssessmentRow": """ @@ -376,27 +428,21 @@ def from_flat(cls, row: dict[str, str], row_num: int) -> "AssessmentRow": key: value for key, value in row.items() if value and key in cls.fields() } - # Create default value with correct length for answer responses if not specified + cls.check_missing_fields(row, row_num) + answers = deserialise_list(row.pop("answers", "")) answer_responses = deserialise_list(row.pop("answer_responses", "")) if not answer_responses: answer_responses = [""] * len(answers) - try: - return cls( - tags=deserialise_list(row.pop("tags", "")), - answers=answers, - scores=[float(i) for i in deserialise_list(row.pop("scores", ""))], - answer_semantic_ids=deserialise_list( - row.pop("answer_semantic_ids", "") - ), - answer_responses=answer_responses, - **row, - ) - except TypeError: - raise ImportAssessmentException( - "The import file is missing some required fields." - ) + return cls( + tags=deserialise_list(row.pop("tags", "")), + answers=answers, + scores=[float(i) for i in deserialise_list(row.pop("scores", ""))], + answer_semantic_ids=deserialise_list(row.pop("answer_semantic_ids", "")), + answer_responses=answer_responses, + **row, + ) def get_content_page_id_from_slug(slug: str, locale: Locale) -> int: diff --git a/home/tests/import-export-data/assessment_missing_related_page.csv b/home/tests/import-export-data/assessment_missing_related_page.csv index 549edc67..8f12ce79 100644 --- a/home/tests/import-export-data/assessment_missing_related_page.csv +++ b/home/tests/import-export-data/assessment_missing_related_page.csv @@ -1,4 +1,4 @@ -title,question_type,tags,slug,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,generic_error,questions,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id -Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,We did not quite get that,Select a number for the first question,Please retry,,,"A,B","4.0,1.0","test-1,test-2",question_semantic_id -Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,We did not quite get that,Select a number for the second question,We did not quite get that,,,"A,B,C","1.0,3.0,4.0","test-1,test-2,test-3",question_semantic_id -Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,We did not quite get that,Select a number for the third question,Sorry we did not quite get that,,,"A,B,C","2.0,1.0,2.0","test-1,test-2,test-3",question_semantic_id +title,question_type,tags,slug,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,generic_error,question,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id +Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,0,We did not quite get that,Select a number for the first question,Please retry,,,"A,B","4.0,1.0","test-1,test-2",question_semantic_id +Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,0,We did not quite get that,Select a number for the second question,We did not quite get that,,,"A,B,C","1.0,3.0,4.0","test-1,test-2,test-3",question_semantic_id +Fake Test,categorical_question,"Random, selection",fake-test,en,fake-page,4,fake-page,3,fake-page,0,We did not quite get that,Select a number for the third question,Sorry we did not quite get that,,,"A,B,C","2.0,1.0,2.0","test-1,test-2,test-3",question_semantic_id diff --git a/home/tests/import-export-data/assessments_missing_generic_error.csv b/home/tests/import-export-data/assessments_missing_generic_error.csv new file mode 100644 index 00000000..d033a080 --- /dev/null +++ b/home/tests/import-export-data/assessments_missing_generic_error.csv @@ -0,0 +1,6 @@ +title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,,Is this a draft assessment,,,,,,,,draf-assessment, +Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_missing_locale.csv b/home/tests/import-export-data/assessments_missing_locale.csv new file mode 100644 index 00000000..dd687497 --- /dev/null +++ b/home/tests/import-export-data/assessments_missing_locale.csv @@ -0,0 +1,6 @@ +title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_missing_title.csv b/home/tests/import-export-data/assessments_missing_title.csv new file mode 100644 index 00000000..2005aaea --- /dev/null +++ b/home/tests/import-export-data/assessments_missing_title.csv @@ -0,0 +1,6 @@ +title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_multiple_missing_headers.csv b/home/tests/import-export-data/assessments_multiple_missing_headers.csv new file mode 100644 index 00000000..fc96ded0 --- /dev/null +++ b/home/tests/import-export-data/assessments_multiple_missing_headers.csv @@ -0,0 +1,6 @@ +,question_type,tags,,version,,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +Test min max range,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/broken_assessment.csv b/home/tests/import-export-data/broken_assessment.csv index 52f3da9c..d25aa76a 100644 --- a/home/tests/import-export-data/broken_assessment.csv +++ b/home/tests/import-export-data/broken_assessment.csv @@ -1,2 +1,2 @@ this,is,not,a,valid,content,csv -"For real, it's totally not." +"For real, it's totally not.",,,,,, diff --git a/home/tests/import-export-data/snake_case_assessments.csv b/home/tests/import-export-data/snake_case_assessments.csv new file mode 100644 index 00000000..26c67a61 --- /dev/null +++ b/home/tests/import-export-data/snake_case_assessments.csv @@ -0,0 +1,5 @@ +title,question type,tags,slug,version,locale,High result page,High inflection,Medium result page,Medium inflection,Low result page,Skip threshold,Skip high result page,Generic error,question,explainer,error,min,max,answers,scores,Answer semantic ids,Question semantic id,Answer responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Test min max range,integer_question,test-min-max-range,test-min-max-range,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 59956512..10cb6594 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -425,6 +425,41 @@ def test_single_assessment(self, impexp: ImportExport) -> None: imported = impexp.get_assessment_json() assert imported == orig + def test_snake_case_assessments(self, csv_impexp: ImportExport) -> None: + """ + Importing a csv with spaces in header names and uppercase text should be coverted to snake case + and pass, provided that the converted text are valid cms headers. + Here we check that the headers are changed to snake case, the assessment is saved + and finally we check that the saved assessment has the correct headers. + + (This uses snake_case_assessments.csv.) + """ + csv_impexp.import_file("snake_case_assessments.csv") + imported_assessments = Assessment.objects.all() + assert imported_assessments.exists() + assessment_model = Assessment + model_fields = [field.name for field in assessment_model._meta.get_fields()] + + expected_fields = { + "title": True, + "slug": True, + "high_inflection": True, + "medium_inflection": True, + "high_result_page": True, + "medium_result_page": True, + "low_result_page": True, + "skip_high_result_page": True, + "skip_threshold": True, + "generic_error": True, + "questions": True, + } + + for field_name, should_exist in expected_fields.items(): + if should_exist: + assert ( + field_name in model_fields + ), f"Field '{field_name}' not found in Assessment model." + @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db() @@ -462,7 +497,10 @@ def test_import_error(self, csv_impexp: ImportExport) -> None: csv_bytes = csv_impexp.import_file("assessment_simple.csv") with pytest.raises(ImportAssessmentException) as e: csv_impexp.import_file("broken_assessment.csv") - assert e.value.message == "The import file is missing some required fields." + assert ( + e.value.message + == "Missing mandatory headers: title, question, slug, generic_error, locale" + ) content = csv_impexp.export_assessment() src, dst = csv_impexp.csvs2dicts(csv_bytes, content) assert dst == src @@ -618,6 +656,52 @@ def test_invalid_medium_score(self, csv_impexp: ImportExport) -> None: ) assert e.value.row_num == 2 + def test_multiple_missing_headers(self, csv_impexp: ImportExport) -> None: + """ + Importing a CSV with multiple missing headers should return an error + stating all the mandatory headers that are missing + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_multiple_missing_headers.csv") + assert ( + e.value.message == "Missing mandatory headers: title, slug, " + "generic_error, locale" + ) + assert e.value.row_num == 1 + + def test_missing_title(self, csv_impexp: ImportExport) -> None: + """ + Importing a CSV with a missing title field should return an error + that a title is missing + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_missing_title.csv") + assert e.value.message == "The import file is missing required fields: title" + assert e.value.row_num == 4 + + def test_missing_locale(self, csv_impexp: ImportExport) -> None: + """ + Importing a CSV with a missing locale field should return an error + that a locale is mmissing + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_missing_locale.csv") + assert e.value.message == "The import file is missing required fields: locale" + assert e.value.row_num == 5 + + def test_missing_generic_error(self, csv_impexp: ImportExport) -> None: + """ + Importing a CSV with a missing generic error field should return an error + that a generic error is mmissing + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_missing_generic_error.csv") + assert ( + e.value.message + == "The import file is missing required fields: generic_error" + ) + assert e.value.row_num == 2 + @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db() From c3270af0380f5c8fcf7050f5c65b109eac279141 Mon Sep 17 00:00:00 2001 From: devchima Date: Mon, 17 Feb 2025 06:01:18 -0500 Subject: [PATCH 02/11] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7588d12c..1e8dddb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Consistent labelling on import forms - Fix class_name warning: RemovedInWagtail60Warning - Helper texts for some ordered content sets +- CMS Forms Flexible imports and Better Error Handling ### Removed - Locale field on exports --> From 10bd8b290b28a5c18034bd8bc7222565f07da5a3 Mon Sep 17 00:00:00 2001 From: devchima Date: Mon, 17 Feb 2025 07:59:57 -0500 Subject: [PATCH 03/11] Handle case where import files are empty --- home/import_assessments.py | 26 ++++++----- home/import_helpers.py | 6 ++- home/tests/import-export-data/empty.csv | 1 + home/tests/test_assessment_import_export.py | 49 ++++++++++++--------- 4 files changed, 50 insertions(+), 32 deletions(-) create mode 100644 home/tests/import-export-data/empty.csv diff --git a/home/import_assessments.py b/home/import_assessments.py index 3cb2b61c..4c986e45 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -106,20 +106,24 @@ def parse_file(self) -> list["AssessmentRow"]: c. Validates that the snake_case headers contain all mandatory headers. d. Transforms each row to use snake_case headers. """ + row_iterator = parse_file(self.file_content, self.file_type) rows = [row for _, row in row_iterator] - if rows: - original_headers = rows[0].keys() - headers_mapping = { - header: self.to_snake_case(header) for header in original_headers - } - snake_case_headers = list(headers_mapping.values()) - self.validate_headers(snake_case_headers, MANDATORY_HEADERS, row_num=1) - transformed_rows = [ - {headers_mapping[key]: value for key, value in row.items()} - for row in rows - ] + if not rows: + raise ImportAssessmentException( + "The import file is empty or contains no valid rows.", row_num=1 + ) + + original_headers = rows[0].keys() + headers_mapping = { + header: self.to_snake_case(header) for header in original_headers + } + snake_case_headers = list(headers_mapping.values()) + self.validate_headers(snake_case_headers, MANDATORY_HEADERS, row_num=1) + transformed_rows = [ + {headers_mapping[key]: value for key, value in row.items()} for row in rows + ] return [ AssessmentRow.from_flat(row, i + 2) diff --git a/home/import_helpers.py b/home/import_helpers.py index 75e00aba..f499baf9 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -169,7 +169,11 @@ def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | N """ Fix keys for all rows by lowercasing keys and removing whitespace from keys and values """ - first_row = next(rows) + try: + first_row = next(rows) + except StopIteration: + return iter([]) + if len(first_row) != len(fix_row(first_row)): raise ImportException( "Invalid format. Please check that there are no duplicate headers." diff --git a/home/tests/import-export-data/empty.csv b/home/tests/import-export-data/empty.csv new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/home/tests/import-export-data/empty.csv @@ -0,0 +1 @@ + diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 10cb6594..ddc441d5 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -427,7 +427,7 @@ def test_single_assessment(self, impexp: ImportExport) -> None: def test_snake_case_assessments(self, csv_impexp: ImportExport) -> None: """ - Importing a csv with spaces in header names and uppercase text should be coverted to snake case + Importing a csv with spaces in header names and uppercase text should be converted to snake case and pass, provided that the converted text are valid cms headers. Here we check that the headers are changed to snake case, the assessment is saved and finally we check that the saved assessment has the correct headers. @@ -440,25 +440,24 @@ def test_snake_case_assessments(self, csv_impexp: ImportExport) -> None: assessment_model = Assessment model_fields = [field.name for field in assessment_model._meta.get_fields()] - expected_fields = { - "title": True, - "slug": True, - "high_inflection": True, - "medium_inflection": True, - "high_result_page": True, - "medium_result_page": True, - "low_result_page": True, - "skip_high_result_page": True, - "skip_threshold": True, - "generic_error": True, - "questions": True, - } - - for field_name, should_exist in expected_fields.items(): - if should_exist: - assert ( - field_name in model_fields - ), f"Field '{field_name}' not found in Assessment model." + expected_fields = [ + "title", + "slug", + "high_inflection", + "medium_inflection", + "high_result_page", + "medium_result_page", + "low_result_page", + "skip_high_result_page", + "skip_threshold", + "generic_error", + "questions", + ] + + for field_name in expected_fields: + assert ( + field_name in model_fields + ), f"Field '{field_name}' not found in Assessment model." @pytest.mark.usefixtures("result_content_pages") @@ -702,6 +701,16 @@ def test_missing_generic_error(self, csv_impexp: ImportExport) -> None: ) assert e.value.row_num == 2 + def test_empty_rows(self, csv_impexp: ImportExport) -> None: + """ + Importing an empty CSV should return an error that the + import file has no rows. + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("empty.csv") + assert e.value.message == "The import file is empty or contains no valid rows." + assert e.value.row_num == 1 + @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db() From c103c9a72e508618402f62b4d6ed5a29e1afaf91 Mon Sep 17 00:00:00 2001 From: devchima Date: Mon, 17 Feb 2025 14:48:23 -0500 Subject: [PATCH 04/11] Move import functions into helper file so they are re-usable by other apps for file imports. - Check empty rows() now actioned for assessment, content pages imports and any other cms imports that uses the parse_file function. - We are not able to move snake_case into fix_rows() in import_helpers.py because contentset uses PascalCase for headers. If we snake_cased every import, there will be key errors in contentsets. --- home/import_assessments.py | 27 ++++++++---------- home/import_helpers.py | 31 ++++++++++++++++++++- home/tests/test_assessment_import_export.py | 6 ++-- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/home/import_assessments.py b/home/import_assessments.py index 4c986e45..aa97ad6c 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -12,7 +12,14 @@ from wagtail.coreutils import get_content_languages # type: ignore from wagtail.models import Locale, Page # type: ignore -from home.import_helpers import ImportException, parse_file, validate_using_form +from home.import_helpers import ( + ImportException, + convert_headers_to_snake_case, + validate_using_form, +) +from home.import_helpers import ( + parse_file as helper_parse_file, +) from home.models import Assessment, ContentPage, HomePage # type: ignore AssessmentId = tuple[str, Locale] @@ -106,21 +113,13 @@ def parse_file(self) -> list["AssessmentRow"]: c. Validates that the snake_case headers contain all mandatory headers. d. Transforms each row to use snake_case headers. """ - - row_iterator = parse_file(self.file_content, self.file_type) + row_iterator = helper_parse_file(self.file_content, self.file_type) rows = [row for _, row in row_iterator] - if not rows: - raise ImportAssessmentException( - "The import file is empty or contains no valid rows.", row_num=1 - ) - original_headers = rows[0].keys() - headers_mapping = { - header: self.to_snake_case(header) for header in original_headers - } + headers_mapping = convert_headers_to_snake_case(list(original_headers)) snake_case_headers = list(headers_mapping.values()) - self.validate_headers(snake_case_headers, MANDATORY_HEADERS, row_num=1) + self.validate_headers(snake_case_headers, row_num=1) transformed_rows = [ {headers_mapping[key]: value for key, value in row.items()} for row in rows ] @@ -214,9 +213,7 @@ def create_shadow_assessment_from_row( ) assessment.questions.append(question) - def validate_headers( - self, headers: list[str], MANDATORY_HEADERS: list[str], row_num: int - ) -> None: + def validate_headers(self, headers: list[str], row_num: int) -> None: missing_headers = [ header for header in MANDATORY_HEADERS if header not in headers ] diff --git a/home/import_helpers.py b/home/import_helpers.py index f499baf9..f90b3ce7 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -1,5 +1,6 @@ # The error messages are processed and parsed into a list of messages we return to the user import csv +import re from collections.abc import Iterator from datetime import datetime from io import BytesIO, StringIO @@ -165,6 +166,30 @@ def extract_errors(data: dict[str | int, Any] | list[str]) -> dict[str, str]: return error_message +def check_empty_rows(rows: list[dict[str, Any]], row_num: int) -> None: + """ + Checks if the list of rows is empty and raises an exception if true. + """ + if not rows: + raise ImportException( + "The import file is empty or contains no valid rows.", row_num=row_num + ) + + +def convert_headers_to_snake_case(headers: list[str]) -> dict[str, str]: + """ + Converts a list of headers to snake_case and returns a mapping. + """ + return {header: to_snake_case(header) for header in headers} + + +def to_snake_case(s: str) -> str: + """ + Converts string to snake_case. + """ + return re.sub(r"[\W_]+", "_", s).lower().strip("_") + + def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]: """ Fix keys for all rows by lowercasing keys and removing whitespace from keys and values @@ -210,7 +235,11 @@ def parse_file( file_content: bytes, file_type: str ) -> Iterator[tuple[int, dict[str, Any]]]: read_rows = read_xlsx if file_type == "XLSX" else read_csv - return enumerate(fix_rows(read_rows(file_content)), start=2) + rows = list(fix_rows(read_rows(file_content))) + + check_empty_rows(rows, row_num=1) + + return enumerate(rows, start=2) def read_csv(file_content: bytes) -> Iterator[dict[str, Any]]: diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index ddc441d5..4c7568e4 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -706,9 +706,11 @@ def test_empty_rows(self, csv_impexp: ImportExport) -> None: Importing an empty CSV should return an error that the import file has no rows. """ - with pytest.raises(ImportAssessmentException) as e: + with pytest.raises(ImportException) as e: csv_impexp.import_file("empty.csv") - assert e.value.message == "The import file is empty or contains no valid rows." + assert e.value.message == [ + "The import file is empty or contains no valid rows." + ] assert e.value.row_num == 1 From 3206672bb86a66bafc241e0b4d3fab4d83f3ff14 Mon Sep 17 00:00:00 2001 From: devchima Date: Tue, 18 Feb 2025 01:49:24 -0500 Subject: [PATCH 05/11] Remove redundant function that was moved to import_helpers file --- home/import_assessments.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/home/import_assessments.py b/home/import_assessments.py index aa97ad6c..87040923 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -1,6 +1,5 @@ import contextlib import csv -import re from dataclasses import dataclass, field, fields from queue import Queue from typing import Any @@ -223,9 +222,6 @@ def validate_headers(self, headers: list[str], row_num: int) -> None: row_num=row_num, ) - def to_snake_case(self, s: str) -> str: - return re.sub(r"[\W_]+", "_", s).lower().strip("_") - # Since wagtail page models are so difficult to create and modify programatically, # we instead store all the pages as these shadow objects, which we can then at the end From 3b890da80cb297168b9766256184407440af6a86 Mon Sep 17 00:00:00 2001 From: devchima Date: Tue, 18 Feb 2025 05:54:55 -0500 Subject: [PATCH 06/11] specific list of characters we convert and remove unneeded tests --- home/import_assessments.py | 13 +--- home/import_helpers.py | 63 ++++++++++++++++--- .../assessments_missing_generic_error.csv | 6 -- .../assessments_missing_locale.csv | 6 -- .../import-export-data/broken_assessment.csv | 2 +- home/tests/test_assessment_import_export.py | 23 ------- 6 files changed, 59 insertions(+), 54 deletions(-) delete mode 100644 home/tests/import-export-data/assessments_missing_generic_error.csv delete mode 100644 home/tests/import-export-data/assessments_missing_locale.csv diff --git a/home/import_assessments.py b/home/import_assessments.py index 87040923..50779595 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -13,7 +13,6 @@ from home.import_helpers import ( ImportException, - convert_headers_to_snake_case, validate_using_form, ) from home.import_helpers import ( @@ -116,17 +115,9 @@ def parse_file(self) -> list["AssessmentRow"]: rows = [row for _, row in row_iterator] original_headers = rows[0].keys() - headers_mapping = convert_headers_to_snake_case(list(original_headers)) - snake_case_headers = list(headers_mapping.values()) - self.validate_headers(snake_case_headers, row_num=1) - transformed_rows = [ - {headers_mapping[key]: value for key, value in row.items()} for row in rows - ] + self.validate_headers(list(original_headers), row_num=1) - return [ - AssessmentRow.from_flat(row, i + 2) - for i, row in enumerate(transformed_rows) - ] + return [AssessmentRow.from_flat(row, i + 2) for i, row in enumerate(rows)] def set_progress(self, message: str, progress: int) -> None: self.progress_queue.put_nowait(progress) diff --git a/home/import_helpers.py b/home/import_helpers.py index f90b3ce7..c45355a2 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -25,6 +25,45 @@ from .xlsx_helpers import get_active_sheet +TYPO_KEYWORDS = [ + "question type", + "question-type", + "high result page", + "high-result-page", + "high inflection", + "high-inflection", + "medium result page", + "medium-result-page", + "medium inflection", + "medium-inflection", + "low result page", + "low-result-page", + "skip threshold", + "skip-threshold", + "skip high result page", + "skip-high-result-page", + "generic error", + "generic_error", + "answer semantic ids", + "answer-semantic-id", + "question semantic id", + "question-semantic-id", + "answer responses", + "answer-responses", +] +""" +List of keywords known to be common user typos or formatting inconsistencies. + +These keywords are identified as common variations or errors in user input that +should be corrected by converting them to snake_case format. The list contains +different representations of header titles from CMS-Forms for conversion to snake_case. + +Any additional keywords from Content Pages and other import applications that need +similar corrections should be appended to this list to maintain uniformity in data processing. +Contentset uses Pascal casing so changes to the application may be needed first before including +those variations in the list. +""" + class ImportException(Exception): """ @@ -192,29 +231,39 @@ def to_snake_case(s: str) -> str: def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]: """ - Fix keys for all rows by lowercasing keys and removing whitespace from keys and values + Fix keys for all rows by lowercasing keys, optionally converting to snake_case + if header text matches typo_keywords, and removing whitespace from keys and values. """ + try: first_row = next(rows) except StopIteration: return iter([]) - if len(first_row) != len(fix_row(first_row)): + if len(first_row) != len(fix_row(first_row, TYPO_KEYWORDS)): raise ImportException( "Invalid format. Please check that there are no duplicate headers." ) - yield fix_row(first_row) + yield fix_row(first_row, TYPO_KEYWORDS) for row in rows: - yield fix_row(row) + yield fix_row(row, TYPO_KEYWORDS) -def fix_row(row: dict[str, str | None]) -> dict[str, str | None]: +def fix_row(row: dict[str, str | None], keywords: list[str]) -> dict[str, str | None]: """ - Fix a single row by lowercasing the key and removing whitespace from the key and value + Fix a single row by lowercasing the key, converting it to snake_case + if it matches a typo_keyword, and removing whitespace from the key and value. """ try: - return {_normalise_key(k): _normalise_value(v) for k, v in row.items()} + return { + ( + to_snake_case(_normalise_key(k)) + if _normalise_key(k) in keywords + else _normalise_key(k) + ): _normalise_value(v) + for k, v in row.items() + } except AttributeError: raise ImportException( "Invalid format. Please check that all row values have headers." diff --git a/home/tests/import-export-data/assessments_missing_generic_error.csv b/home/tests/import-export-data/assessments_missing_generic_error.csv deleted file mode 100644 index d033a080..00000000 --- a/home/tests/import-export-data/assessments_missing_generic_error.csv +++ /dev/null @@ -1,6 +0,0 @@ -title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses -Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,,Is this a draft assessment,,,,,,,,draf-assessment, -Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, -Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, -Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, -Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_missing_locale.csv b/home/tests/import-export-data/assessments_missing_locale.csv deleted file mode 100644 index dd687497..00000000 --- a/home/tests/import-export-data/assessments_missing_locale.csv +++ /dev/null @@ -1,6 +0,0 @@ -title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses -Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, -Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, -Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, -Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, -Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/broken_assessment.csv b/home/tests/import-export-data/broken_assessment.csv index d25aa76a..52f3da9c 100644 --- a/home/tests/import-export-data/broken_assessment.csv +++ b/home/tests/import-export-data/broken_assessment.csv @@ -1,2 +1,2 @@ this,is,not,a,valid,content,csv -"For real, it's totally not.",,,,,, +"For real, it's totally not." diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 4c7568e4..0645076b 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -678,29 +678,6 @@ def test_missing_title(self, csv_impexp: ImportExport) -> None: assert e.value.message == "The import file is missing required fields: title" assert e.value.row_num == 4 - def test_missing_locale(self, csv_impexp: ImportExport) -> None: - """ - Importing a CSV with a missing locale field should return an error - that a locale is mmissing - """ - with pytest.raises(ImportAssessmentException) as e: - csv_impexp.import_file("assessments_missing_locale.csv") - assert e.value.message == "The import file is missing required fields: locale" - assert e.value.row_num == 5 - - def test_missing_generic_error(self, csv_impexp: ImportExport) -> None: - """ - Importing a CSV with a missing generic error field should return an error - that a generic error is mmissing - """ - with pytest.raises(ImportAssessmentException) as e: - csv_impexp.import_file("assessments_missing_generic_error.csv") - assert ( - e.value.message - == "The import file is missing required fields: generic_error" - ) - assert e.value.row_num == 2 - def test_empty_rows(self, csv_impexp: ImportExport) -> None: """ Importing an empty CSV should return an error that the From cc507cd7b6500f0f86da64046f1de7e3782cfbf5 Mon Sep 17 00:00:00 2001 From: devchima Date: Tue, 18 Feb 2025 07:51:05 -0500 Subject: [PATCH 07/11] Add check for invalid characters in headers, an exception and a test to handle it --- home/import_assessments.py | 15 ++- home/import_helpers.py | 103 ++++++++---------- .../import-export-data/invalid_character.csv | 5 + home/tests/test_assessment_import_export.py | 13 +++ 4 files changed, 74 insertions(+), 62 deletions(-) create mode 100644 home/tests/import-export-data/invalid_character.csv diff --git a/home/import_assessments.py b/home/import_assessments.py index 50779595..f5d5cbe0 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -13,6 +13,7 @@ from home.import_helpers import ( ImportException, + convert_headers_to_snake_case, validate_using_form, ) from home.import_helpers import ( @@ -115,9 +116,19 @@ def parse_file(self) -> list["AssessmentRow"]: rows = [row for _, row in row_iterator] original_headers = rows[0].keys() - self.validate_headers(list(original_headers), row_num=1) + headers_mapping = convert_headers_to_snake_case( + list(original_headers), row_num=1 + ) + snake_case_headers = list(headers_mapping.values()) + self.validate_headers(snake_case_headers, row_num=1) + transformed_rows = [ + {headers_mapping[key]: value for key, value in row.items()} for row in rows + ] - return [AssessmentRow.from_flat(row, i + 2) for i, row in enumerate(rows)] + return [ + AssessmentRow.from_flat(row, i + 2) + for i, row in enumerate(transformed_rows) + ] def set_progress(self, message: str, progress: int) -> None: self.progress_queue.put_nowait(progress) diff --git a/home/import_helpers.py b/home/import_helpers.py index c45355a2..8467a814 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -25,44 +25,32 @@ from .xlsx_helpers import get_active_sheet -TYPO_KEYWORDS = [ - "question type", - "question-type", - "high result page", - "high-result-page", - "high inflection", - "high-inflection", - "medium result page", - "medium-result-page", - "medium inflection", - "medium-inflection", - "low result page", - "low-result-page", - "skip threshold", - "skip-threshold", - "skip high result page", - "skip-high-result-page", - "generic error", - "generic_error", - "answer semantic ids", - "answer-semantic-id", - "question semantic id", - "question-semantic-id", - "answer responses", - "answer-responses", -] -""" -List of keywords known to be common user typos or formatting inconsistencies. - -These keywords are identified as common variations or errors in user input that -should be corrected by converting them to snake_case format. The list contains -different representations of header titles from CMS-Forms for conversion to snake_case. - -Any additional keywords from Content Pages and other import applications that need -similar corrections should be appended to this list to maintain uniformity in data processing. -Contentset uses Pascal casing so changes to the application may be needed first before including -those variations in the list. -""" +INVALID_CHARACTERS = { + ":", + ";", + "!", + "@", + "#", + "$", + "%", + "^", + "&", + "*", + "(", + ")", + "+", + "=", + "{", + "}", + "[", + "]", + "|", + "\\", + "/", + "?", + ">", + "<", +} class ImportException(Exception): @@ -215,55 +203,50 @@ def check_empty_rows(rows: list[dict[str, Any]], row_num: int) -> None: ) -def convert_headers_to_snake_case(headers: list[str]) -> dict[str, str]: +def convert_headers_to_snake_case(headers: list[str], row_num: int) -> dict[str, str]: """ Converts a list of headers to snake_case and returns a mapping. """ - return {header: to_snake_case(header) for header in headers} + return {header: to_snake_case(header, row_num) for header in headers} -def to_snake_case(s: str) -> str: +def to_snake_case(s: str, row_num: int) -> str: """ - Converts string to snake_case. + Converts a given string to snake_case if it contains spaces or hyphens. + Throws an exception for invalid headers containing special characters. """ - return re.sub(r"[\W_]+", "_", s).lower().strip("_") + if any(char in s for char in INVALID_CHARACTERS): + raise ImportException( + f"Invalid header: '{s}' contains invalid characters.", row_num=row_num + ) + return re.sub(r"[\W_]+", "_", s).strip("_") def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]: """ - Fix keys for all rows by lowercasing keys, optionally converting to snake_case - if header text matches typo_keywords, and removing whitespace from keys and values. + Fix keys for all rows by lowercasing keys and removing whitespace from keys and values """ - try: first_row = next(rows) except StopIteration: return iter([]) - if len(first_row) != len(fix_row(first_row, TYPO_KEYWORDS)): + if len(first_row) != len(fix_row(first_row)): raise ImportException( "Invalid format. Please check that there are no duplicate headers." ) - yield fix_row(first_row, TYPO_KEYWORDS) + yield fix_row(first_row) for row in rows: - yield fix_row(row, TYPO_KEYWORDS) + yield fix_row(row) -def fix_row(row: dict[str, str | None], keywords: list[str]) -> dict[str, str | None]: +def fix_row(row: dict[str, str | None]) -> dict[str, str | None]: """ - Fix a single row by lowercasing the key, converting it to snake_case - if it matches a typo_keyword, and removing whitespace from the key and value. + Fix a single row by lowercasing the key and removing whitespace from the key and value """ try: - return { - ( - to_snake_case(_normalise_key(k)) - if _normalise_key(k) in keywords - else _normalise_key(k) - ): _normalise_value(v) - for k, v in row.items() - } + return {_normalise_key(k): _normalise_value(v) for k, v in row.items()} except AttributeError: raise ImportException( "Invalid format. Please check that all row values have headers." diff --git a/home/tests/import-export-data/invalid_character.csv b/home/tests/import-export-data/invalid_character.csv new file mode 100644 index 00000000..88aac940 --- /dev/null +++ b/home/tests/import-export-data/invalid_character.csv @@ -0,0 +1,5 @@ +title,question type:,tags,Slug,version,locale,High result page,High inflection,Medium result page,Medium inflection,Low result page,Skip threshold,Skip high result page,generic_error,question,explainer,error,min,max,answers,scores,Answer semantic ids,Question semantic id,Answer responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Test min max range,integer_question,test-min-max-range,test-min-max-range,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 0645076b..08214fc4 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -690,6 +690,19 @@ def test_empty_rows(self, csv_impexp: ImportExport) -> None: ] assert e.value.row_num == 1 + def test_invalid_character(self, csv_impexp: ImportExport) -> None: + """ + Importing an empty CSV with an invalid character in the header + should return an error with the line number and header + that was wrongly entered. + """ + with pytest.raises(ImportException) as e: + csv_impexp.import_file("invalid_character.csv") + assert e.value.message == [ + "Invalid header: 'question type:' contains invalid characters." + ] + assert e.value.row_num == 1 + @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db() From 27d1c374ca35b8506a170039a6f578f470624c83 Mon Sep 17 00:00:00 2001 From: devchima Date: Wed, 19 Feb 2025 02:45:29 -0500 Subject: [PATCH 08/11] We do not want to catch all invalid characters. Remove invalid character exception --- home/import_assessments.py | 1 + home/import_helpers.py | 35 +------------------ .../import-export-data/invalid_character.csv | 5 --- home/tests/test_assessment_import_export.py | 13 ------- 4 files changed, 2 insertions(+), 52 deletions(-) delete mode 100644 home/tests/import-export-data/invalid_character.csv diff --git a/home/import_assessments.py b/home/import_assessments.py index f5d5cbe0..b3ba21fc 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -119,6 +119,7 @@ def parse_file(self) -> list["AssessmentRow"]: headers_mapping = convert_headers_to_snake_case( list(original_headers), row_num=1 ) + snake_case_headers = list(headers_mapping.values()) self.validate_headers(snake_case_headers, row_num=1) transformed_rows = [ diff --git a/home/import_helpers.py b/home/import_helpers.py index 8467a814..83186107 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -1,6 +1,5 @@ # The error messages are processed and parsed into a list of messages we return to the user import csv -import re from collections.abc import Iterator from datetime import datetime from io import BytesIO, StringIO @@ -25,33 +24,6 @@ from .xlsx_helpers import get_active_sheet -INVALID_CHARACTERS = { - ":", - ";", - "!", - "@", - "#", - "$", - "%", - "^", - "&", - "*", - "(", - ")", - "+", - "=", - "{", - "}", - "[", - "]", - "|", - "\\", - "/", - "?", - ">", - "<", -} - class ImportException(Exception): """ @@ -213,13 +185,8 @@ def convert_headers_to_snake_case(headers: list[str], row_num: int) -> dict[str, def to_snake_case(s: str, row_num: int) -> str: """ Converts a given string to snake_case if it contains spaces or hyphens. - Throws an exception for invalid headers containing special characters. """ - if any(char in s for char in INVALID_CHARACTERS): - raise ImportException( - f"Invalid header: '{s}' contains invalid characters.", row_num=row_num - ) - return re.sub(r"[\W_]+", "_", s).strip("_") + return s.replace(" ", "_").replace("-", "_").strip("_") def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]: diff --git a/home/tests/import-export-data/invalid_character.csv b/home/tests/import-export-data/invalid_character.csv deleted file mode 100644 index 88aac940..00000000 --- a/home/tests/import-export-data/invalid_character.csv +++ /dev/null @@ -1,5 +0,0 @@ -title,question type:,tags,Slug,version,locale,High result page,High inflection,Medium result page,Medium inflection,Low result page,Skip threshold,Skip high result page,generic_error,question,explainer,error,min,max,answers,scores,Answer semantic ids,Question semantic id,Answer responses -Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, -Test min max range,integer_question,test-min-max-range,test-min-max-range,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, -Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, -Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,high-inflection,5,medium-score,3,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 08214fc4..0645076b 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -690,19 +690,6 @@ def test_empty_rows(self, csv_impexp: ImportExport) -> None: ] assert e.value.row_num == 1 - def test_invalid_character(self, csv_impexp: ImportExport) -> None: - """ - Importing an empty CSV with an invalid character in the header - should return an error with the line number and header - that was wrongly entered. - """ - with pytest.raises(ImportException) as e: - csv_impexp.import_file("invalid_character.csv") - assert e.value.message == [ - "Invalid header: 'question type:' contains invalid characters." - ] - assert e.value.row_num == 1 - @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db() From 3329359a84a478503bf3cc9dbb1aaf77f4d4b0ed Mon Sep 17 00:00:00 2001 From: devchima Date: Wed, 19 Feb 2025 06:32:14 -0500 Subject: [PATCH 09/11] Update dataclass s mandatory fields have no default value --- home/import_assessments.py | 51 +++++++++++++++++++------------------- home/import_helpers.py | 3 +++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/home/import_assessments.py b/home/import_assessments.py index b3ba21fc..66b2026c 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -372,11 +372,13 @@ class AssessmentRow: """ slug: str - title: str = "" + title: str + question: str + generic_error: str + locale: str version: str = "" tags: list[str] = field(default_factory=list) question_type: str = "" - locale: str = "" high_result_page: str = "" high_inflection: str = "" medium_result_page: str = "" @@ -384,8 +386,6 @@ class AssessmentRow: low_result_page: str = "" skip_threshold: str = "" skip_high_result_page: str = "" - generic_error: str = "" - question: str = "" explainer: str = "" error: str = "" min: str = "" @@ -400,18 +400,6 @@ class AssessmentRow: def fields(cls) -> list[str]: return [field.name for field in fields(cls)] - @classmethod - def check_missing_fields(cls, row: dict[str, str], row_num: int) -> None: - """ - Checks for missing required fields in the row and raises an exception if any is missing. - """ - missing_fields = [field for field in MANDATORY_HEADERS if field not in row] - if missing_fields: - raise ImportAssessmentException( - f"The import file is missing required fields: {', '.join(missing_fields)}", - row_num, - ) - @classmethod def from_flat(cls, row: dict[str, str], row_num: int) -> "AssessmentRow": """ @@ -428,21 +416,32 @@ def from_flat(cls, row: dict[str, str], row_num: int) -> "AssessmentRow": key: value for key, value in row.items() if value and key in cls.fields() } - cls.check_missing_fields(row, row_num) - answers = deserialise_list(row.pop("answers", "")) answer_responses = deserialise_list(row.pop("answer_responses", "")) if not answer_responses: answer_responses = [""] * len(answers) - return cls( - tags=deserialise_list(row.pop("tags", "")), - answers=answers, - scores=[float(i) for i in deserialise_list(row.pop("scores", ""))], - answer_semantic_ids=deserialise_list(row.pop("answer_semantic_ids", "")), - answer_responses=answer_responses, - **row, - ) + try: + return cls( + tags=deserialise_list(row.pop("tags", "")), + answers=answers, + scores=[float(i) for i in deserialise_list(row.pop("scores", ""))], + answer_semantic_ids=deserialise_list( + row.pop("answer_semantic_ids", "") + ), + answer_responses=answer_responses, + **row, + ) + except TypeError: + missing_fields = [ + field + for field in MANDATORY_HEADERS + if field not in row or row[field] == "" + ] + raise ImportAssessmentException( + f"The import file is missing required fields: {', '.join(missing_fields)}", + row_num, + ) def get_content_page_id_from_slug(slug: str, locale: Locale) -> int: diff --git a/home/import_helpers.py b/home/import_helpers.py index 83186107..a7203a02 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -175,6 +175,9 @@ def check_empty_rows(rows: list[dict[str, Any]], row_num: int) -> None: ) +# TODO: +# Move to shared code once we're able to work on the contentpage import. +# Contentsets uses pascal case headers def convert_headers_to_snake_case(headers: list[str], row_num: int) -> dict[str, str]: """ Converts a list of headers to snake_case and returns a mapping. From ce3cbc9581473ac12676059bf2577964e0f47a41 Mon Sep 17 00:00:00 2001 From: devchima Date: Wed, 19 Feb 2025 06:49:06 -0500 Subject: [PATCH 10/11] Update exception error message --- home/import_assessments.py | 2 +- home/tests/test_assessment_import_export.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/home/import_assessments.py b/home/import_assessments.py index 66b2026c..0b23c9b3 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -439,7 +439,7 @@ def from_flat(cls, row: dict[str, str], row_num: int) -> "AssessmentRow": if field not in row or row[field] == "" ] raise ImportAssessmentException( - f"The import file is missing required fields: {', '.join(missing_fields)}", + f"Row missing values for required fields: {', '.join(missing_fields)}", row_num, ) diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 0645076b..8ae9f523 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -675,7 +675,7 @@ def test_missing_title(self, csv_impexp: ImportExport) -> None: """ with pytest.raises(ImportAssessmentException) as e: csv_impexp.import_file("assessments_missing_title.csv") - assert e.value.message == "The import file is missing required fields: title" + assert e.value.message == "Row missing values for required fields: title" assert e.value.row_num == 4 def test_empty_rows(self, csv_impexp: ImportExport) -> None: From 5e95476f43281ee66f12d91bd09f9a7a551d1da2 Mon Sep 17 00:00:00 2001 From: devchima Date: Wed, 19 Feb 2025 08:00:50 -0500 Subject: [PATCH 11/11] Add more tests for headers and missing values --- .../assessments_missing_row_values.csv | 6 +++++ ...essments_single_header_multiple_fields.csv | 6 +++++ home/tests/test_assessment_import_export.py | 25 +++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 home/tests/import-export-data/assessments_missing_row_values.csv create mode 100644 home/tests/import-export-data/assessments_single_header_multiple_fields.csv diff --git a/home/tests/import-export-data/assessments_missing_row_values.csv b/home/tests/import-export-data/assessments_missing_row_values.csv new file mode 100644 index 00000000..015afc4a --- /dev/null +++ b/home/tests/import-export-data/assessments_missing_row_values.csv @@ -0,0 +1,6 @@ +title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +,integer_question,test-min-max-range,,v1.0,,,5,,3,,0,,,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_single_header_multiple_fields.csv b/home/tests/import-export-data/assessments_single_header_multiple_fields.csv new file mode 100644 index 00000000..9846e2d2 --- /dev/null +++ b/home/tests/import-export-data/assessments_single_header_multiple_fields.csv @@ -0,0 +1,6 @@ +,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses +Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, +,freetext_question,,,v1.0,,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, +Test min max range,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, +Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, +Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 8ae9f523..3f6bb410 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -690,6 +690,31 @@ def test_empty_rows(self, csv_impexp: ImportExport) -> None: ] assert e.value.row_num == 1 + def test_multiple_missing_values(self, csv_impexp: ImportExport) -> None: + """ + Importing an empty CSV should return an error with all the missing + values in a row + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_missing_row_values.csv") + assert ( + e.value.message + == "Row missing values for required fields: title, slug, generic_error, locale" + ) + assert e.value.row_num == 4 + + def test_single_missing_header_multiple_missing_rows( + self, csv_impexp: ImportExport + ) -> None: + """ + Importing a CSV with a single missing header and multiple missing rows + should return an error that a header is missing + """ + with pytest.raises(ImportAssessmentException) as e: + csv_impexp.import_file("assessments_single_header_multiple_fields.csv") + assert e.value.message == "Missing mandatory headers: title" + assert e.value.row_num == 1 + @pytest.mark.usefixtures("result_content_pages") @pytest.mark.django_db()