Skip to content

Commit

Permalink
fix ensure_ascii for json.dumps
Browse files Browse the repository at this point in the history
  • Loading branch information
ifsheldon committed Aug 16, 2024
1 parent d5b60bb commit 39e0f16
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ async def aextract(self, node: t.Union[Node, LCDocument]) -> t.Any:
return await self.aextract_text(node.properties[self.attribute])
elif self.attribute in node.properties["metadata"]:
return await self.aextract_text(
json.dumps(node.properties["metadata"][self.attribute])
json.dumps(node.properties["metadata"][self.attribute], ensure_ascii=False)
)
else:
raise ValueError(f"Attribute {self.attribute} not found in node")
Expand All @@ -34,7 +34,7 @@ async def aextract(self, node: t.Union[Node, LCDocument]) -> t.Any:
return await self.aextract_text(node.page_content)
elif self.attribute in node.metadata:
return await self.aextract_text(
json.dumps(node.metadata[self.attribute])
json.dumps(node.metadata[self.attribute], ensure_ascii=False)
)
else:
raise ValueError(f"Attribute {self.attribute} not found in node")
Expand All @@ -45,15 +45,15 @@ def extract(self, node: t.Union[Node, LCDocument]) -> t.Any:
return self.extract_text(node.properties[self.attribute])
elif self.attribute in node.properties["metadata"]:
return self.extract_text(
json.dumps(node.properties["metadata"][self.attribute])
json.dumps(node.properties["metadata"][self.attribute], ensure_ascii=False)
)
else:
raise ValueError(f"Attribute {self.attribute} not found in node")
elif isinstance(node, LCDocument):
if self.attribute == "page_content":
return self.extract_text(node.page_content)
elif self.attribute in node.metadata:
return self.extract_text(json.dumps(node.metadata[self.attribute]))
return self.extract_text(json.dumps(node.metadata[self.attribute], ensure_ascii=False))
else:
raise ValueError(f"Attribute {self.attribute} not found in node")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ async def retrieve_chunks(

query = LEAF_NODE_QUERY
leaf_nodes = [
self.query_nodes(query, {"id": json.dumps(id)}) for id in node_ids
self.query_nodes(query, {"id": json.dumps(id, ensure_ascii=False)}) for id in node_ids
]
leaf_nodes = [node for nodes in leaf_nodes for node in nodes]
if leaf_nodes is None:
Expand Down Expand Up @@ -485,7 +485,7 @@ async def retrieve_chunks(

query = LEAF_NODE_QUERY
leaf_nodes = [
self.query_nodes(query, {"id": json.dumps(id)}) for id in node_ids
self.query_nodes(query, {"id": json.dumps(id, ensure_ascii=False)}) for id in node_ids
]
leaf_nodes = [node for nodes in leaf_nodes for node in nodes]
leaf_nodes = [
Expand Down
2 changes: 1 addition & 1 deletion src/ragas/_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_userid() -> str:
user_id = "a-" + uuid.uuid4().hex
os.makedirs(user_id_path)
with open(uuid_filepath, "w") as f:
json.dump({"userid": user_id}, f)
json.dump({"userid": user_id}, f, ensure_ascii=False)
return user_id


Expand Down
2 changes: 1 addition & 1 deletion src/ragas/llms/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_json_format_instructions(pydantic_object: t.Type[TBaseModel]) -> str:
if "title" in reduced_schema:
del reduced_schema["title"]
# Ensure json in context is well-formed with double quotes.
schema_str = json.dumps(reduced_schema)
schema_str = json.dumps(reduced_schema, ensure_ascii=False)

resp = JSON_FORMAT_INSTRUCTIONS.format(schema=schema_str)
return resp
Expand Down
4 changes: 2 additions & 2 deletions src/ragas/llms/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def format(self, **kwargs: t.Any) -> PromptValue:
)
for key, value in kwargs.items():
if isinstance(value, str):
kwargs[key] = json.dumps(value)
kwargs[key] = json.dumps(value, ensure_ascii=False)

prompt = self.to_string()
return PromptValue(prompt_str=prompt.format(**kwargs))
Expand Down Expand Up @@ -277,7 +277,7 @@ def save(self, cache_dir: t.Optional[str] = None):

cache_path = os.path.join(cache_dir, f"{self.name}.json")
with open(cache_path, "w") as file:
json.dump(self.dict(), file, indent=4)
json.dump(self.dict(), file, indent=4, ensure_ascii=False)

@classmethod
def _load(cls, language: str, name: str, cache_dir: str) -> Prompt:
Expand Down
2 changes: 1 addition & 1 deletion src/ragas/metrics/_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _create_nli_prompt(self, row: t.Dict, statements: t.List[str]) -> PromptValu
contexts = row["contexts"]
# check if the statements are support in the contexts
contexts_str: str = "\n".join(contexts)
statements_str: str = json.dumps(statements)
statements_str: str = json.dumps(statements, ensure_ascii=False)
prompt_value = self.nli_statements_message.format(
context=contexts_str, statements=statements_str
)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_load_userid_from_json_file(tmp_path, monkeypatch):
with open(userid_filepath, "w") as f:
import json

json.dump({"userid": "test-userid"}, f)
json.dump({"userid": "test-userid"}, f, ensure_ascii=False)

from ragas._analytics import get_userid

Expand Down

0 comments on commit 39e0f16

Please sign in to comment.