Skip to content

Commit

Permalink
remove hidden characters in import files
Browse files Browse the repository at this point in the history
  • Loading branch information
Hlamallama committed Feb 5, 2025
1 parent dbde0a0 commit aa42580
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 9 deletions.
39 changes: 30 additions & 9 deletions home/import_content_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from json.decoder import JSONDecodeError
from queue import Queue
from typing import Any
import unicodedata
from uuid import uuid4
import re
from dataclasses import replace

from django.core.exceptions import ObjectDoesNotExist, ValidationError # type: ignore
from taggit.models import Tag # type: ignore
Expand Down Expand Up @@ -96,25 +99,43 @@ def process_rows(self, rows: list["ContentRow"]) -> None:
prev_locale: Locale | None = None
for i, row in enumerate(rows, start=2):
try:
if row.is_page_index:
prev_locale = self._get_locale_from_row(row)
cleaned_row = self._clean_row_content(row)
if cleaned_row.is_page_index:
prev_locale = self._get_locale_from_row(cleaned_row)
if self.locale and self.locale != prev_locale:
# This page index isn't for the locale we're importing, so skip it.
continue
self.create_content_page_index_from_row(row)
elif row.is_content_page:
self.create_shadow_content_page_from_row(row, i)
prev_locale = self._get_locale_from_row(row)
elif row.is_variation_message:
self.add_variation_to_shadow_content_page_from_row(row, prev_locale)
self.create_content_page_index_from_row(cleaned_row)

elif cleaned_row.is_content_page:
self.create_shadow_content_page_from_row(cleaned_row, i)
prev_locale = self._get_locale_from_row(cleaned_row)

elif cleaned_row.is_variation_message:
self.add_variation_to_shadow_content_page_from_row(cleaned_row, prev_locale)

else:
self.add_message_to_shadow_content_page_from_row(row, prev_locale)
self.add_message_to_shadow_content_page_from_row(cleaned_row, prev_locale)

except ImportException as e:
e.row_num = i
e.slug = row.slug
e.locale = row.locale
raise e

def _clean_row_content(self, row: "ContentRow") -> "ContentRow":
cleaned_data = {}

for field in fields(row):
value = getattr(row, field.name)
if isinstance(value, str):
cleaned_value = "".join(char for char in value if char.isprintable() or char in "\n\r\t").strip()
cleaned_data[field.name] = cleaned_value
else:
cleaned_data[field.name] = value

return replace(row, **cleaned_data)

def _get_locale_from_row(self, row: "ContentRow") -> Locale:
if row.language_code:
try:
Expand Down
25 changes: 25 additions & 0 deletions home/tests/import-export-data/test_special_chars.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
structure,message,page_id,Slug,parent,web_title,web_subtitle,web_body,whatsapp_title,whatsapp_body,whatsapp_template_name,whatsapp_template_category,example_values,variation_title,variation_body,list_title,list_items,sms_title,sms_body,ussd_title,ussd_body,messenger_title,messenger_body,viber_title,viber_body,translation_tag,tags,quick_replies,triggers,locale,next_prompt,buttons,image_link,doc_link,media_link,related_pages,footer,language_code
Menu 1,0,4,main-menu,,Main Menu,,,,,,,,,,,,,,,,,,,,a0b85075-d01b-46bf-8997-8591e87ba171,,,,English,,,,,,,,
Sub 1.1,1,5,main-menu-first-time-user,Main Menu,main menu first time user,,,main menu first time user,"🟩🟩🟩🟩🟩🟩🟩🟩

You have completed your registration! 🎉 🌟 ⭐

*Pregnancy info*🤰🏽
{pregnancy_info_status}

*Basic info* 👤
{basic_info_status}

*Personal info* 🗝️
{personal_info_status}

*Daily life*☀️
{DMA_status}

You can always edit it or add info in `My Profile` on the main menu.

What do you want to do next? 👇🏾",,UTILITY,[],,,,[],,,,,main menu first time user,"Welcome to HealthAlert 🌍

This is a messaging service created by the World Health Organization (WHO) that provides information on COVID-19 as well as emergency resources for disease outbreaks, natural, and man-made disasters.

You can return to this main menu at any time by replying 🏠

Choose what you'd like to know more about by tapping a button below ⬇️",,,5892bccd-8025-419d-9a8e-a6a37b755dbf,menu,"Self-help🌬️, Settings⚙️, Health Info🏥",Main menu🏠,English,,[],,,,,,
6 changes: 6 additions & 0 deletions home/tests/test_content_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -1665,6 +1665,12 @@ def test_language_code_import(self, csv_impexp: ImportExport) -> None:
assert dst == src


def test_hidden_characters(self, csv_impexp: ImportExport) -> None:
""" """
csv_bytes = csv_impexp.import_file("test_special_chars.csv")
csv_dict = csv2dicts(csv_bytes)
assert '\u2028' not in csv_dict[1]['whatsapp_body']

@pytest.mark.django_db
class TestExport:
"""
Expand Down

0 comments on commit aa42580

Please sign in to comment.