Eval: pass experiment name to wandbot call; update eval config import

wandb · Jan 16, 2025 · 6d93e4f · 6d93e4f
1 parent ffcdc74
commit 6d93e4f
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 6 deletions.
diff --git a/src/wandbot/evaluation/eval/async_main.py b/src/wandbot/evaluation/eval/async_main.py
@@ -12,7 +12,7 @@
 from tqdm import tqdm
 
 import wandb
-from wandbot.evaluation.config import EvalConfig
+from wandbot.evaluation.eval_config import EvalConfig
 from wandbot.evaluation.eval.correctness import (
     CORRECTNESS_EVAL_TEMPLATE,
     WandbCorrectnessEvaluator,

diff --git a/src/wandbot/evaluation/weave_eval/eval.py b/src/wandbot/evaluation/weave_eval/eval.py
@@ -87,8 +87,8 @@ def parse_text_to_json(text):
 
 
 @weave.op
-async def get_record(question: str, language: str = "en") -> dict:
-    response = await get_answer(question, language=language)
+async def get_record(question: str, application: str = "api-eval", language: str = "en") -> dict:
+    response = await get_answer(question, application, language=language)
     response = json.loads(response)
 
     # Return default values if response is empty or missing fields
@@ -120,10 +120,11 @@ async def get_record(question: str, language: str = "en") -> dict:
 
 class WandbotModel(weave.Model):
     language: str = "en"
+    application: str = "api-eval"
 
     @weave.op
     async def predict(self, question: str) -> dict:
-        prediction = await get_record(question, language=self.language)
+        prediction = await get_record(question, application=self.application, language=self.language)
         return prediction
 
 @weave.op
@@ -183,7 +184,7 @@ def main():
     ]
     logger.info("Number of evaluation samples: %s", len(question_rows))
 
-    wandbot = WandbotModel(language=config.lang)
+    wandbot = WandbotModel(language=config.lang, application=config.experiment_name)
 
     wandbot_evaluator = Evaluation(
         name=config.evaluation_name,

diff --git a/src/wandbot/evaluation/weave_eval/log_data.py b/src/wandbot/evaluation/weave_eval/log_data.py
@@ -7,7 +7,7 @@
 import pandas as pd
 from weave import Dataset
 
-from wandbot.evaluation.config import EvalConfig
+from wandbot.evaluation.eval_config import EvalConfig
 
 config = EvalConfig()