From aa4258010160102ed4943eadb76901dd5cfa7afe Mon Sep 17 00:00:00 2001 From: Hlamalani Date: Wed, 5 Feb 2025 10:23:14 +0200 Subject: [PATCH] remove hidden characters in import files --- home/import_content_pages.py | 39 ++++++++++++++----- .../import-export-data/test_special_chars.csv | 25 ++++++++++++ home/tests/test_content_import_export.py | 6 +++ 3 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 home/tests/import-export-data/test_special_chars.csv diff --git a/home/import_content_pages.py b/home/import_content_pages.py index 6c6aca0b..87462175 100644 --- a/home/import_content_pages.py +++ b/home/import_content_pages.py @@ -6,7 +6,10 @@ from json.decoder import JSONDecodeError from queue import Queue from typing import Any +import unicodedata from uuid import uuid4 +import re +from dataclasses import replace from django.core.exceptions import ObjectDoesNotExist, ValidationError # type: ignore from taggit.models import Tag # type: ignore @@ -96,25 +99,43 @@ def process_rows(self, rows: list["ContentRow"]) -> None: prev_locale: Locale | None = None for i, row in enumerate(rows, start=2): try: - if row.is_page_index: - prev_locale = self._get_locale_from_row(row) + cleaned_row = self._clean_row_content(row) + if cleaned_row.is_page_index: + prev_locale = self._get_locale_from_row(cleaned_row) if self.locale and self.locale != prev_locale: # This page index isn't for the locale we're importing, so skip it. continue - self.create_content_page_index_from_row(row) - elif row.is_content_page: - self.create_shadow_content_page_from_row(row, i) - prev_locale = self._get_locale_from_row(row) - elif row.is_variation_message: - self.add_variation_to_shadow_content_page_from_row(row, prev_locale) + self.create_content_page_index_from_row(cleaned_row) + + elif cleaned_row.is_content_page: + self.create_shadow_content_page_from_row(cleaned_row, i) + prev_locale = self._get_locale_from_row(cleaned_row) + + elif cleaned_row.is_variation_message: + self.add_variation_to_shadow_content_page_from_row(cleaned_row, prev_locale) + else: - self.add_message_to_shadow_content_page_from_row(row, prev_locale) + self.add_message_to_shadow_content_page_from_row(cleaned_row, prev_locale) + except ImportException as e: e.row_num = i e.slug = row.slug e.locale = row.locale raise e + def _clean_row_content(self, row: "ContentRow") -> "ContentRow": + cleaned_data = {} + + for field in fields(row): + value = getattr(row, field.name) + if isinstance(value, str): + cleaned_value = "".join(char for char in value if char.isprintable() or char in "\n\r\t").strip() + cleaned_data[field.name] = cleaned_value + else: + cleaned_data[field.name] = value + + return replace(row, **cleaned_data) + def _get_locale_from_row(self, row: "ContentRow") -> Locale: if row.language_code: try: diff --git a/home/tests/import-export-data/test_special_chars.csv b/home/tests/import-export-data/test_special_chars.csv new file mode 100644 index 00000000..775d4971 --- /dev/null +++ b/home/tests/import-export-data/test_special_chars.csv @@ -0,0 +1,25 @@ +structure,message,page_id,Slug,parent,web_title,web_subtitle,web_body,whatsapp_title,whatsapp_body,whatsapp_template_name,whatsapp_template_category,example_values,variation_title,variation_body,list_title,list_items,sms_title,sms_body,ussd_title,ussd_body,messenger_title,messenger_body,viber_title,viber_body,translation_tag,tags,quick_replies,triggers,locale,next_prompt,buttons,image_link,doc_link,media_link,related_pages,footer,language_code +Menu 1,0,4,main-menu,,Main Menu,,,,,,,,,,,,,,,,,,,,a0b85075-d01b-46bf-8997-8591e87ba171,,,,English,,,,,,,, +Sub 1.1,1,5,main-menu-first-time-user,Main Menu,main menu first time user,,,main menu first time user,"🟩🟩🟩🟩🟩🟩🟩🟩 + +You have completed your registration! 🎉 🌟 ⭐ + +*Pregnancy info*🤰🏽 +{pregnancy_info_status} + +*Basic info* 👤 +{basic_info_status} + +*Personal info* 🗝️ +{personal_info_status} + +*Daily life*☀️ +{DMA_status} + +You can always edit it or add info in `My Profile` on the main menu.

What do you want to do next? 👇🏾",,UTILITY,[],,,,[],,,,,main menu first time user,"Welcome to HealthAlert 🌍 + +This is a messaging service created by the World Health Organization (WHO) that provides information on COVID-19 as well as emergency resources for disease outbreaks, natural, and man-made disasters. + +You can return to this main menu at any time by replying 🏠 + +Choose what you'd like to know more about by tapping a button below ⬇️",,,5892bccd-8025-419d-9a8e-a6a37b755dbf,menu,"Self-help🌬️, Settings⚙️, Health Info🏥",Main menu🏠,English,,[],,,,,, diff --git a/home/tests/test_content_import_export.py b/home/tests/test_content_import_export.py index 5c8f429c..bcba8d71 100644 --- a/home/tests/test_content_import_export.py +++ b/home/tests/test_content_import_export.py @@ -1665,6 +1665,12 @@ def test_language_code_import(self, csv_impexp: ImportExport) -> None: assert dst == src + def test_hidden_characters(self, csv_impexp: ImportExport) -> None: + """ """ + csv_bytes = csv_impexp.import_file("test_special_chars.csv") + csv_dict = csv2dicts(csv_bytes) + assert '\u2028' not in csv_dict[1]['whatsapp_body'] + @pytest.mark.django_db class TestExport: """