Merge branch 'main' into main

Kranium2002 · Oct 29, 2024 · d1b240b · d1b240b
2 parents a44bca4 + 9675d81
commit d1b240b
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -2,12 +2,13 @@
   <img alt="giskardlogo" src="https://raw.githubusercontent.com/giskard-ai/giskard/main/readme/giskard_logo.png#gh-light-mode-only">
   <img alt="giskardlogo" src="https://raw.githubusercontent.com/giskard-ai/giskard/main/readme/giskard_logo_green.png#gh-dark-mode-only">
 </p>
-<h1 align="center" weight='300' >The Evaluation & Testing framework for LLMs & ML models</h1>
-<h3 align="center" weight='300' >Control risks of performance, bias and security issues in AI models</h3>
+<h1 align="center" weight='300' >The Evaluation & Testing framework for AI systems</h1>
+<h3 align="center" weight='300' >Control risks of performance, bias and security issues in AI systems</h3>
 <div align="center">
 
   [![GitHub release](https://img.shields.io/github/v/release/Giskard-AI/giskard)](https://github.com/Giskard-AI/giskard/releases)
   [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Giskard-AI/giskard/blob/main/LICENSE)
+  [![Downloads](https://static.pepy.tech/badge/giskard/month)](https://pepy.tech/project/giskard)
   [![CI](https://github.com/Giskard-AI/giskard/actions/workflows/build-python.yml/badge.svg?branch=main)](https://github.com/Giskard-AI/giskard/actions/workflows/build-python.yml?query=branch%3Amain)
   [![Giskard on Discord](https://img.shields.io/discord/939190303397666868?label=Discord)](https://gisk.ar/discord)
 
@@ -16,9 +17,8 @@
 </div>
 <h3 align="center">
    <a href="https://docs.giskard.ai/en/stable/getting_started/index.html"><b>Docs</b></a> &bull;
-   <a href="https://www.giskard.ai/knowledge-categories/news/?utm_source=github&utm_medium=github&utm_campaign=github_readme&utm_id=readmeblog"><b>Blog</b></a> &bull;
   <a href="https://www.giskard.ai/?utm_source=github&utm_medium=github&utm_campaign=github_readme&utm_id=readmeblog"><b>Website</b></a> &bull;
-  <a href="https://gisk.ar/discord"><b>Discord</b></a>
+  <a href="https://gisk.ar/discord"><b>Community</b></a>
  </h3>
 <br />
 

diff --git a/giskard/rag/report.py b/giskard/rag/report.py
@@ -156,18 +156,24 @@ def save(self, folder_path: str):
 
         report_details = {"recommendation": self._recommendation}
         with open(path / "report_details.json", "w", encoding="utf-8") as f:
-            json.dump(report_details, f)
+            json.dump(report_details, f, ensure_ascii=False)
 
-        self._knowledge_base._knowledge_base_df.to_json(path / "knowledge_base.jsonl", orient="records", lines=True)
+        self._knowledge_base._knowledge_base_df.to_json(
+            path / "knowledge_base.jsonl", orient="records", lines=True, force_ascii=False
+        )
         with open(path / "knowledge_base_meta.json", "w", encoding="utf-8") as f:
-            json.dump(self._knowledge_base.get_savable_data(), f)
+            json.dump(self._knowledge_base.get_savable_data(), f, ensure_ascii=False)
 
         with open(path / "agent_answer.json", "w", encoding="utf-8") as f:
-            json.dump([{"message": output.message, "documents": output.documents} for output in self._model_outputs], f)
+            json.dump(
+                [{"message": output.message, "documents": output.documents} for output in self._model_outputs],
+                f,
+                ensure_ascii=False,
+            )
 
         if self._metrics_results is not None:
             with open(path / "metrics_results.json", "w", encoding="utf-8") as f:
-                json.dump(self._metrics_results, f)
+                json.dump(self._metrics_results, f, ensure_ascii=False)
 
     @classmethod
     def load(

diff --git a/giskard/rag/testset.py b/giskard/rag/testset.py
@@ -96,7 +96,7 @@ def save(self, path):
         path : str
             The path to the output JSONL file.
         """
-        self._dataframe.reset_index().to_json(path, orient="records", lines=True)
+        self._dataframe.reset_index().to_json(path, orient="records", lines=True, force_ascii=False)
 
     @classmethod
     def load(cls, path):

diff --git a/tests/rag/test_qa_testset.py b/tests/rag/test_qa_testset.py
@@ -87,6 +87,37 @@ def make_testset_samples():
     ]
 
 
+def make_swedish_testset_samples():
+    return [
+        QuestionSample(
+            id="1",
+            question="Vilken mjölk används för att göra Camembert?",
+            reference_answer="Komjölk används för att göra Camembert.",
+            reference_context="Camembert är en fuktig, mjuk, krämig, ytmognad ost av komjölk.",
+            conversation_history=[],
+            metadata={
+                "question_type": "enkel",
+                "color": "blå",
+                "topic": "Ost_1",
+                "seed_document_id": "1",
+            },
+        ),
+        QuestionSample(
+            id="2",
+            question="Varifrån kommer Scamorza?",
+            reference_answer="Scamorza kommer från södra Italien.",
+            reference_context="Scamorza är en ost av komjölk från södra Italien.",
+            conversation_history=[],
+            metadata={
+                "question_type": "enkel",
+                "color": "röd",
+                "topic": "Ost_1",
+                "seed_document_id": "2",
+            },
+        ),
+    ]
+
+
 def test_qa_testset_creation():
     question_samples = make_testset_samples()
     testset = QATestset(question_samples)
@@ -146,6 +177,20 @@ def test_qa_testset_saving_loading(tmp_path):
     )
 
 
+def test_qa_testset_saving_loading_swedish(tmp_path):
+    testset = QATestset(make_swedish_testset_samples())
+    path = tmp_path / "testset.jsonl"
+    testset.save(path)
+    loaded_testset = QATestset.load(path)
+
+    assert all(
+        [
+            original == loaded
+            for original, loaded in zip(testset._dataframe["metadata"], loaded_testset._dataframe["metadata"])
+        ]
+    )
+
+
 def test_metadata_value_retrieval():
     testset = QATestset(make_testset_samples())