fix: allow MTBenchBranchEvaluator to return a 2 or 3 sized tuple

This commit is in preparation of a commit to the eval library [1] that returns the overall score from MT-Bench-Branch judgement. [1] instructlab/eval#138 Signed-off-by: Ali Maredia <[email protected]>
alimaredia · Sep 27, 2024 · 27f84e2 · 27f84e2
1 parent 832ebf0
commit 27f84e2
Showing 1 changed file with 9 additions and 1 deletion.
diff --git a/src/instructlab/model/evaluate.py b/src/instructlab/model/evaluate.py
@@ -659,9 +659,17 @@ def evaluate(
                 for i, evaluator in enumerate(evaluators):
                     branch = branches[i]
                     print(f"Evaluating answers for branch {branch}...")
-                    qa_pairs, error_rate = evaluator.judge_answers(
+                    judgement = evaluator.judge_answers(
                         api_base, max_workers=max_workers, serving_gpus=effective_gpus
                     )
+
+                    if len(judgement) == 3:
+                        qa_pairs = judgement[1]
+                        error_rate = judgement[2]
+                    else:
+                        qa_pairs = judgement[0]
+                        error_rate = judgement[1]
+
                     qa_pairs_and_errors.append((qa_pairs, error_rate))
             finally:
                 if server is not None: