diff --git a/docs/howtos/customisations/aws-bedrock.ipynb b/docs/howtos/customisations/aws-bedrock.ipynb index 8a067f17e..cfa9459c0 100644 --- a/docs/howtos/customisations/aws-bedrock.ipynb +++ b/docs/howtos/customisations/aws-bedrock.ipynb @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "f17bcf9d", "metadata": {}, "outputs": [], @@ -65,6 +65,7 @@ " faithfulness,\n", " context_recall,\n", ")\n", + "from ragas.metrics.critique import harmfulness\n", "\n", "# list of metrics we're going to use\n", "metrics = [\n", @@ -72,6 +73,7 @@ " answer_relevancy,\n", " context_recall,\n", " context_precision,\n", + " harmfulness\n", "]" ] }, @@ -87,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "40406a26", "metadata": {}, "outputs": [], @@ -135,7 +137,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "52d9f5f3", "metadata": {}, "outputs": [], @@ -156,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "22eb6f97", "metadata": {}, "outputs": [ @@ -171,7 +173,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:45<00:00, 52.69s/it]\n" + "100%|█████████████████████████████████████████████████████████████| 2/2 [01:22<00:00, 41.24s/it]\n" ] }, { @@ -185,7 +187,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:18<00:00, 39.08s/it]\n" + "100%|█████████████████████████████████████████████████████████████| 2/2 [01:21<00:00, 40.59s/it]\n" ] }, { @@ -199,7 +201,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [01:22<00:00, 41.10s/it]\n" + "100%|█████████████████████████████████████████████████████████████| 2/2 [00:46<00:00, 23.22s/it]\n" ] }, { @@ -213,16 +215,30 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:51<00:00, 25.86s/it]\n" + "100%|█████████████████████████████████████████████████████████████| 2/2 [00:59<00:00, 29.85s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "evaluating with [harmfulness]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████| 2/2 [00:33<00:00, 16.96s/it]\n" ] }, { "data": { "text/plain": [ - "{'ragas_score': 0.0000, 'faithfulness': 0.9380, 'answer_relevancy': 0.7857, 'context_recall': 0.2287, 'context_precision': 0.0000}" + "{'faithfulness': 0.9428, 'answer_relevancy': 0.7860, 'context_recall': 0.2296, 'context_precision': 0.0000, 'harmfulness': 0.0000}" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -254,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "id": "8686bf53", "metadata": {}, "outputs": [ @@ -287,6 +303,7 @@ " answer_relevancy\n", " context_recall\n", " context_precision\n", + " harmfulness\n", " \n", " \n", " \n", @@ -300,6 +317,7 @@ " 0.930311\n", " 0.263158\n", " 0.0\n", + " 0\n", " \n", " \n", " 1\n", @@ -311,6 +329,7 @@ " 0.984122\n", " 0.363636\n", " 0.0\n", + " 0\n", " \n", " \n", " 2\n", @@ -319,9 +338,10 @@ " \\nYes, it is possible to have one EIN doing bu...\n", " [You're confusing a lot of things here. Compan...\n", " 1.0\n", - " 0.877362\n", + " 0.883872\n", " 0.363636\n", " 0.0\n", + " 0\n", " \n", " \n", " 3\n", @@ -330,9 +350,10 @@ " \\nApplying for and receiving business credit c...\n", " [\"I'm afraid the great myth of limited liabili...\n", " 1.0\n", - " 0.519469\n", + " 0.518287\n", " 0.363636\n", " 0.0\n", + " 0\n", " \n", " \n", " 4\n", @@ -344,6 +365,7 @@ " 0.779471\n", " 0.000000\n", " 0.0\n", + " 0\n", " \n", " \n", "\n", @@ -378,15 +400,15 @@ "3 [\"I'm afraid the great myth of limited liabili... 1.0 \n", "4 [You should probably consult an attorney. Howe... 1.0 \n", "\n", - " answer_relevancy context_recall context_precision \n", - "0 0.930311 0.263158 0.0 \n", - "1 0.984122 0.363636 0.0 \n", - "2 0.877362 0.363636 0.0 \n", - "3 0.519469 0.363636 0.0 \n", - "4 0.779471 0.000000 0.0 " + " answer_relevancy context_recall context_precision harmfulness \n", + "0 0.930311 0.263158 0.0 0 \n", + "1 0.984122 0.363636 0.0 0 \n", + "2 0.883872 0.363636 0.0 0 \n", + "3 0.518287 0.363636 0.0 0 \n", + "4 0.779471 0.000000 0.0 0 " ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -423,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index ccddf6c86..94c1612ee 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -120,5 +120,7 @@ def init_model(self): """ self.llm.validate_api_key() if hasattr(self, "embeddings"): - self.embeddings = t.cast(RagasEmbeddings, self.embeddings) - self.embeddings.validate_api_key() + # since we are using Langchain Embeddings directly, we need to check this + if hasattr(self.embeddings, "validate_api_key"): + self.embeddings = t.cast(RagasEmbeddings, self.embeddings) + self.embeddings.validate_api_key() diff --git a/src/ragas/metrics/critique.py b/src/ragas/metrics/critique.py index f26eee56e..6636fc2b6 100644 --- a/src/ragas/metrics/critique.py +++ b/src/ragas/metrics/critique.py @@ -131,7 +131,8 @@ def _score_batch( else: score = answer_dict.get(response[0][-1]) - scores.append(score) + # patch for critique: force score to 0 if the answer is not Yes or No + scores.append(score if score is not None else 0) return scores