Merge pull request #429 from praekeltfoundation/cms-forms-fix-xls-import

Cms forms fix xls import
praekeltfoundation · Feb 27, 2025 · 6d6c752 · 6d6c752
2 parents 4f05381 + df59179
commit 6d6c752
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Changed Assessments to CMS Forms
 - Validation for high_inflection, medium_inflection and score field on CMS Forms
 - Validation for interactive messages
+- Fix cms-forms xls import
 ### Removed
 - Locale field on exports
 - Menu app

diff --git a/home/import_helpers.py b/home/import_helpers.py
@@ -262,24 +262,33 @@ def read_csv(file_content: bytes) -> Iterator[dict[str, Any]]:
     return csv.DictReader(StringIO(file_content.decode()))
 
 
+def remove_trailing_nones(row: list[Any]) -> list[Any]:
+    while row and row[-1] is None:
+        row = row[:-1]
+    return row
+
+
+def clean_excel_cell(cell_value: str | float | datetime | None) -> str:
+    return "" if cell_value is None else str(cell_value).replace("_x000D", "").strip()
+
+
 def read_xlsx(file_content: bytes) -> Iterator[dict[str, Any]]:
     workbook = load_workbook(BytesIO(file_content), read_only=True, data_only=True)
     worksheet = get_active_sheet(workbook)
 
-    def clean_excel_cell(cell_value: str | float | datetime | None) -> str:
-        return str(cell_value).replace("_x000D", "").strip()
-
     first_row = next(worksheet.iter_rows(max_row=1, values_only=True))
     header = [clean_excel_cell(cell) if cell else None for cell in first_row]
+    header = remove_trailing_nones(header)
 
     for row in worksheet.iter_rows(min_row=2, values_only=True):
+        row = remove_trailing_nones(row)  # type: ignore # Mypy cannot guarantee row is a list; rows may be tuples so we bypass.
         r = {}
         if len(row) > len(header):
             raise ImportException(
                 "Invalid format. Please check that all row values have headers."
             )
-        for name, cell in zip(header, row):  # noqa: B905
-            if name and cell:
+        for name, cell in zip(header, row, strict=False):
+            if name:
                 r[name] = clean_excel_cell(cell)
         if r:
             yield r
diff --git a/home/tests/import-export-data/assessment_empty_values.xlsx b/home/tests/import-export-data/assessment_empty_values.xlsx
diff --git a/home/tests/test_assessment_import_export.py b/home/tests/test_assessment_import_export.py
@@ -526,6 +526,18 @@ def test_import_assessment_xlsx(self, xlsx_impexp: ImportExport) -> None:
         content_pages = Assessment.objects.all()
         assert len(content_pages) > 0
 
+    def test_import_assessment_empty_values_xlsx(
+        self, xlsx_impexp: ImportExport
+    ) -> None:
+        """
+        Importing an XLSX  Assessments file where the corresponding cell is empty
+        should not break.
+        """
+        xlsx_impexp.import_content_file("assessment_results.xlsx", purge=False)
+        xlsx_impexp.import_file("assessment_empty_values.xlsx", purge=False)
+        content_pages = Assessment.objects.all()
+        assert len(content_pages) > 0
+
     def test_invalid_high_inflecton_format(self, xlsx_impexp: ImportExport) -> None:
         """
         Importing an xlsx with a comma in high inflecton value