diff --git a/home/import_assessments.py b/home/import_assessments.py index 87040923..50779595 100644 --- a/home/import_assessments.py +++ b/home/import_assessments.py @@ -13,7 +13,6 @@ from home.import_helpers import ( ImportException, - convert_headers_to_snake_case, validate_using_form, ) from home.import_helpers import ( @@ -116,17 +115,9 @@ def parse_file(self) -> list["AssessmentRow"]: rows = [row for _, row in row_iterator] original_headers = rows[0].keys() - headers_mapping = convert_headers_to_snake_case(list(original_headers)) - snake_case_headers = list(headers_mapping.values()) - self.validate_headers(snake_case_headers, row_num=1) - transformed_rows = [ - {headers_mapping[key]: value for key, value in row.items()} for row in rows - ] + self.validate_headers(list(original_headers), row_num=1) - return [ - AssessmentRow.from_flat(row, i + 2) - for i, row in enumerate(transformed_rows) - ] + return [AssessmentRow.from_flat(row, i + 2) for i, row in enumerate(rows)] def set_progress(self, message: str, progress: int) -> None: self.progress_queue.put_nowait(progress) diff --git a/home/import_helpers.py b/home/import_helpers.py index f90b3ce7..c45355a2 100644 --- a/home/import_helpers.py +++ b/home/import_helpers.py @@ -25,6 +25,45 @@ from .xlsx_helpers import get_active_sheet +TYPO_KEYWORDS = [ + "question type", + "question-type", + "high result page", + "high-result-page", + "high inflection", + "high-inflection", + "medium result page", + "medium-result-page", + "medium inflection", + "medium-inflection", + "low result page", + "low-result-page", + "skip threshold", + "skip-threshold", + "skip high result page", + "skip-high-result-page", + "generic error", + "generic_error", + "answer semantic ids", + "answer-semantic-id", + "question semantic id", + "question-semantic-id", + "answer responses", + "answer-responses", +] +""" +List of keywords known to be common user typos or formatting inconsistencies. + +These keywords are identified as common variations or errors in user input that +should be corrected by converting them to snake_case format. The list contains +different representations of header titles from CMS-Forms for conversion to snake_case. + +Any additional keywords from Content Pages and other import applications that need +similar corrections should be appended to this list to maintain uniformity in data processing. +Contentset uses Pascal casing so changes to the application may be needed first before including +those variations in the list. +""" + class ImportException(Exception): """ @@ -192,29 +231,39 @@ def to_snake_case(s: str) -> str: def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]: """ - Fix keys for all rows by lowercasing keys and removing whitespace from keys and values + Fix keys for all rows by lowercasing keys, optionally converting to snake_case + if header text matches typo_keywords, and removing whitespace from keys and values. """ + try: first_row = next(rows) except StopIteration: return iter([]) - if len(first_row) != len(fix_row(first_row)): + if len(first_row) != len(fix_row(first_row, TYPO_KEYWORDS)): raise ImportException( "Invalid format. Please check that there are no duplicate headers." ) - yield fix_row(first_row) + yield fix_row(first_row, TYPO_KEYWORDS) for row in rows: - yield fix_row(row) + yield fix_row(row, TYPO_KEYWORDS) -def fix_row(row: dict[str, str | None]) -> dict[str, str | None]: +def fix_row(row: dict[str, str | None], keywords: list[str]) -> dict[str, str | None]: """ - Fix a single row by lowercasing the key and removing whitespace from the key and value + Fix a single row by lowercasing the key, converting it to snake_case + if it matches a typo_keyword, and removing whitespace from the key and value. """ try: - return {_normalise_key(k): _normalise_value(v) for k, v in row.items()} + return { + ( + to_snake_case(_normalise_key(k)) + if _normalise_key(k) in keywords + else _normalise_key(k) + ): _normalise_value(v) + for k, v in row.items() + } except AttributeError: raise ImportException( "Invalid format. Please check that all row values have headers." diff --git a/home/tests/import-export-data/assessments_missing_generic_error.csv b/home/tests/import-export-data/assessments_missing_generic_error.csv deleted file mode 100644 index d033a080..00000000 --- a/home/tests/import-export-data/assessments_missing_generic_error.csv +++ /dev/null @@ -1,6 +0,0 @@ -title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses -Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,,Is this a draft assessment,,,,,,,,draf-assessment, -Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, -Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, -Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,en,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, -Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/assessments_missing_locale.csv b/home/tests/import-export-data/assessments_missing_locale.csv deleted file mode 100644 index dd687497..00000000 --- a/home/tests/import-export-data/assessments_missing_locale.csv +++ /dev/null @@ -1,6 +0,0 @@ -title,question_type,tags,slug,version,locale,high_result_page,high_inflection,medium_result_page,medium_inflection,low_result_page,skip_threshold,skip_high_result_page,generic_error,question,explainer,error,min,max,answers,scores,answer_semantic_ids,question_semantic_id,answer_responses -Freetext Question,freetext_question,draft-assessment,Draft-assessment,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, -Random French,freetext_question,,random-french,v1.0,fr,high-inflection,5,,3,,0,,Sorry we didn't quite get that.,What do you not like about France,We need to know this,,,,,,,france-notlike, -Integer Question,integer_question,test-min-max-range,test-min-max-range,v1.0,en,,5,,3,,0,,This is a generic error,Lowest temeprature you're experienced,We need to know some things,This is an error message,0,30,,,,lowest-temperature, -Weather Trivia,integer_question,weather-trivia,weather-trivia,v1.0,,high-inflection,5,medium-score,1,low-score,0,,"Sorry, we didn't quite get that.",What's the coldest weather you're experienced?,We need to know some things,Your reply should be between {min} and {max},50,70,,,,coldest-weather, -Draft Assessment 2,freetext_question,draft-assessment,Draft-assessment-2,v1.0,en,,,,,,0,,This is a generic error for draft page,Is this a draft assessment,,,,,,,,draf-assessment, diff --git a/home/tests/import-export-data/broken_assessment.csv b/home/tests/import-export-data/broken_assessment.csv index d25aa76a..52f3da9c 100644 --- a/home/tests/import-export-data/broken_assessment.csv +++ b/home/tests/import-export-data/broken_assessment.csv @@ -1,2 +1,2 @@ this,is,not,a,valid,content,csv -"For real, it's totally not.",,,,,, +"For real, it's totally not." diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py index 4c7568e4..0645076b 100644 --- a/home/tests/test_assessment_import_export.py +++ b/home/tests/test_assessment_import_export.py @@ -678,29 +678,6 @@ def test_missing_title(self, csv_impexp: ImportExport) -> None: assert e.value.message == "The import file is missing required fields: title" assert e.value.row_num == 4 - def test_missing_locale(self, csv_impexp: ImportExport) -> None: - """ - Importing a CSV with a missing locale field should return an error - that a locale is mmissing - """ - with pytest.raises(ImportAssessmentException) as e: - csv_impexp.import_file("assessments_missing_locale.csv") - assert e.value.message == "The import file is missing required fields: locale" - assert e.value.row_num == 5 - - def test_missing_generic_error(self, csv_impexp: ImportExport) -> None: - """ - Importing a CSV with a missing generic error field should return an error - that a generic error is mmissing - """ - with pytest.raises(ImportAssessmentException) as e: - csv_impexp.import_file("assessments_missing_generic_error.csv") - assert ( - e.value.message - == "The import file is missing required fields: generic_error" - ) - assert e.value.row_num == 2 - def test_empty_rows(self, csv_impexp: ImportExport) -> None: """ Importing an empty CSV should return an error that the