Skip to content

Commit

Permalink
specific list of characters we convert and remove unneeded tests
Browse files Browse the repository at this point in the history
  • Loading branch information
DevChima committed Feb 18, 2025
1 parent 3206672 commit 3b890da
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 54 deletions.
13 changes: 2 additions & 11 deletions home/import_assessments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

from home.import_helpers import (
ImportException,
convert_headers_to_snake_case,
validate_using_form,
)
from home.import_helpers import (
Expand Down Expand Up @@ -116,17 +115,9 @@ def parse_file(self) -> list["AssessmentRow"]:
rows = [row for _, row in row_iterator]

original_headers = rows[0].keys()
headers_mapping = convert_headers_to_snake_case(list(original_headers))
snake_case_headers = list(headers_mapping.values())
self.validate_headers(snake_case_headers, row_num=1)
transformed_rows = [
{headers_mapping[key]: value for key, value in row.items()} for row in rows
]
self.validate_headers(list(original_headers), row_num=1)

return [
AssessmentRow.from_flat(row, i + 2)
for i, row in enumerate(transformed_rows)
]
return [AssessmentRow.from_flat(row, i + 2) for i, row in enumerate(rows)]

def set_progress(self, message: str, progress: int) -> None:
self.progress_queue.put_nowait(progress)
Expand Down
63 changes: 56 additions & 7 deletions home/import_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,45 @@

from .xlsx_helpers import get_active_sheet

TYPO_KEYWORDS = [
"question type",
"question-type",
"high result page",
"high-result-page",
"high inflection",
"high-inflection",
"medium result page",
"medium-result-page",
"medium inflection",
"medium-inflection",
"low result page",
"low-result-page",
"skip threshold",
"skip-threshold",
"skip high result page",
"skip-high-result-page",
"generic error",
"generic_error",
"answer semantic ids",
"answer-semantic-id",
"question semantic id",
"question-semantic-id",
"answer responses",
"answer-responses",
]
"""
List of keywords known to be common user typos or formatting inconsistencies.
These keywords are identified as common variations or errors in user input that
should be corrected by converting them to snake_case format. The list contains
different representations of header titles from CMS-Forms for conversion to snake_case.
Any additional keywords from Content Pages and other import applications that need
similar corrections should be appended to this list to maintain uniformity in data processing.
Contentset uses Pascal casing so changes to the application may be needed first before including
those variations in the list.
"""


class ImportException(Exception):
"""
Expand Down Expand Up @@ -192,29 +231,39 @@ def to_snake_case(s: str) -> str:

def fix_rows(rows: Iterator[dict[str | Any, Any]]) -> Iterator[dict[str, str | None]]:
"""
Fix keys for all rows by lowercasing keys and removing whitespace from keys and values
Fix keys for all rows by lowercasing keys, optionally converting to snake_case
if header text matches typo_keywords, and removing whitespace from keys and values.
"""

try:
first_row = next(rows)
except StopIteration:
return iter([])

if len(first_row) != len(fix_row(first_row)):
if len(first_row) != len(fix_row(first_row, TYPO_KEYWORDS)):
raise ImportException(
"Invalid format. Please check that there are no duplicate headers."
)
yield fix_row(first_row)
yield fix_row(first_row, TYPO_KEYWORDS)

for row in rows:
yield fix_row(row)
yield fix_row(row, TYPO_KEYWORDS)


def fix_row(row: dict[str, str | None]) -> dict[str, str | None]:
def fix_row(row: dict[str, str | None], keywords: list[str]) -> dict[str, str | None]:
"""
Fix a single row by lowercasing the key and removing whitespace from the key and value
Fix a single row by lowercasing the key, converting it to snake_case
if it matches a typo_keyword, and removing whitespace from the key and value.
"""
try:
return {_normalise_key(k): _normalise_value(v) for k, v in row.items()}
return {
(
to_snake_case(_normalise_key(k))
if _normalise_key(k) in keywords
else _normalise_key(k)
): _normalise_value(v)
for k, v in row.items()
}
except AttributeError:
raise ImportException(
"Invalid format. Please check that all row values have headers."
Expand Down

This file was deleted.

6 changes: 0 additions & 6 deletions home/tests/import-export-data/assessments_missing_locale.csv

This file was deleted.

2 changes: 1 addition & 1 deletion home/tests/import-export-data/broken_assessment.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
this,is,not,a,valid,content,csv
"For real, it's totally not.",,,,,,
"For real, it's totally not."
23 changes: 0 additions & 23 deletions home/tests/test_assessment_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,29 +678,6 @@ def test_missing_title(self, csv_impexp: ImportExport) -> None:
assert e.value.message == "The import file is missing required fields: title"
assert e.value.row_num == 4

def test_missing_locale(self, csv_impexp: ImportExport) -> None:
"""
Importing a CSV with a missing locale field should return an error
that a locale is mmissing
"""
with pytest.raises(ImportAssessmentException) as e:
csv_impexp.import_file("assessments_missing_locale.csv")
assert e.value.message == "The import file is missing required fields: locale"
assert e.value.row_num == 5

def test_missing_generic_error(self, csv_impexp: ImportExport) -> None:
"""
Importing a CSV with a missing generic error field should return an error
that a generic error is mmissing
"""
with pytest.raises(ImportAssessmentException) as e:
csv_impexp.import_file("assessments_missing_generic_error.csv")
assert (
e.value.message
== "The import file is missing required fields: generic_error"
)
assert e.value.row_num == 2

def test_empty_rows(self, csv_impexp: ImportExport) -> None:
"""
Importing an empty CSV should return an error that the
Expand Down

0 comments on commit 3b890da

Please sign in to comment.