diff --git a/src/ragas/metrics/_faithfulness.py b/src/ragas/metrics/_faithfulness.py index 1da97b955..d3f07d1cf 100644 --- a/src/ragas/metrics/_faithfulness.py +++ b/src/ragas/metrics/_faithfulness.py @@ -120,9 +120,12 @@ def _score_batch( ) score = score / len(list_statements[i]) else: - score = max(0, output.count("verdict: no")) / len( - list_statements[i] - ) + if 'verdict: no' in output or 'verdict: yes' in output: + score = max(0, output.count("verdict: no")) / len( + list_statements[i] + ) + else: # output from LLM is broken and we can't evaluate score. + score = 1 scores.append(1 - score)