Remove ModelConfig type as a temporary solution. (#2836)

# Description In this PR we are removing types from the evaluator constructors as a temporary solution, before AzureOpenAIModelConfiguration type in the evaluator constructor will be officially supported. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes.
microsoft · Apr 17, 2024 · 8f043ed · 8f043ed
1 parent ee002ba
commit 8f043ed
Show file tree

Hide file tree

Showing 13 changed files with 57 additions and 27 deletions.
diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py
@@ -11,15 +11,14 @@
 
 import numpy as np
 
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.evals.evaluators import CoherenceEvaluator, FluencyEvaluator, GroundednessEvaluator, RelevanceEvaluator
 
 logger = logging.getLogger(__name__)
 
 
 class ChatEvaluator:
     def __init__(
-        self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
+        self, model_config, eval_last_turn: bool = False, parallel: bool = True
     ):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
@@ -64,7 +63,7 @@ def __init__(
             FluencyEvaluator(model_config),
         ]
 
-    def __call__(self, *, conversation: List[Dict], **kwargs):
+    def __call__(self, *, conversation, **kwargs):
         """Evaluates chat scenario.
 
         :param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys.

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py
@@ -7,12 +7,11 @@
 from pathlib import Path
 
 from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.core._prompty_utils import convert_model_configuration_to_connection
 
 
 class CoherenceEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py
@@ -1,12 +1,10 @@
 from promptflow.client import load_flow
 from pathlib import Path
-from azure.core.credentials import TokenCredential
 from promptflow.evals._constants import EvaluationMetrics
-from typing import Optional
 
 
 class HateUnfairnessEvaluator:
-    def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
+    def __init__(self, project_scope: dict, credential=None):
         """
         Initialize an evaluator for hate unfairness score.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py
@@ -1,12 +1,10 @@
 from promptflow.client import load_flow
 from pathlib import Path
-from azure.core.credentials import TokenCredential
 from promptflow.evals._constants import EvaluationMetrics
-from typing import Optional
 
 
 class SelfHarmEvaluator:
-    def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
+    def __init__(self, project_scope: dict, credential=None):
         """
         Initialize an evaluator for self harm score.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py
@@ -1,12 +1,10 @@
 from promptflow.client import load_flow
 from pathlib import Path
-from azure.core.credentials import TokenCredential
 from promptflow.evals._constants import EvaluationMetrics
-from typing import Optional
 
 
 class SexualEvaluator:
-    def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
+    def __init__(self, project_scope: dict, credential=None):
         """
         Initialize an evaluator for sexual score.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py
@@ -1,12 +1,10 @@
 from promptflow.client import load_flow
 from pathlib import Path
-from azure.core.credentials import TokenCredential
 from promptflow.evals._constants import EvaluationMetrics
-from typing import Optional
 
 
 class ViolenceEvaluator:
-    def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
+    def __init__(self, project_scope: dict, credential=None):
         """
         Initialize an evaluator for violence score.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py
@@ -7,12 +7,11 @@
 from pathlib import Path
 
 from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.core._prompty_utils import convert_model_configuration_to_connection
 
 
 class FluencyEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py
@@ -7,12 +7,11 @@
 from pathlib import Path
 
 from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.core._prompty_utils import convert_model_configuration_to_connection
 
 
 class GroundednessEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py
@@ -4,7 +4,6 @@
 
 __path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
 
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.evals.evaluators import (
     CoherenceEvaluator,
     F1ScoreEvaluator,
@@ -16,7 +15,7 @@
 
 
 class QAEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py
@@ -7,12 +7,11 @@
 from pathlib import Path
 
 from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.core._prompty_utils import convert_model_configuration_to_connection
 
 
 class RelevanceEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py
@@ -7,12 +7,11 @@
 from pathlib import Path
 
 from promptflow.client import load_flow
-from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.core._prompty_utils import convert_model_configuration_to_connection
 
 
 class SimilarityEvaluator:
-    def __init__(self, model_config: AzureOpenAIModelConfiguration):
+    def __init__(self, model_config):
         """
         Initialize an evaluator configured for a specific Azure OpenAI model.
 

diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py
@@ -6,6 +6,7 @@
 import pytest
 from pytest_mock import MockerFixture
 
+from promptflow.client import PFClient
 from promptflow.core import AzureOpenAIModelConfiguration
 from promptflow.executor._line_execution_process_pool import _process_wrapper
 from promptflow.executor._process_manager import create_spawned_fork_process_manager
@@ -72,6 +73,12 @@ def model_config() -> dict:
     return model_config
 
 
+@pytest.fixture
+def pf_client() -> PFClient:
+    """The fixture, returning PRClient"""
+    return PFClient()
+
+
 # ==================== Recording injection ====================
 # To inject patches in subprocesses, add new mock method in setup_recording_injection_if_enabled
 # in fork mode, this is automatically enabled.

diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py
@@ -0,0 +1,38 @@
+from typing import Any, List, Optional, Type
+
+import inspect
+import os
+import pytest
+
+from promptflow.evals import evaluators
+from promptflow.evals.evaluators import content_safety
+
+
+def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]:
+    evaluators = []
+    for name, obj in inspect.getmembers(namespace):
+        if inspect.isclass(obj):
+            if exceptions and name in exceptions:
+                continue
+            evaluators.append(obj)
+    return evaluators
+
+
+@pytest.mark.unittest
+class TestSaveEval:
+    """Test saving evaluators."""
+
+    EVALUATORS = get_evaluators_from_module(evaluators)
+    RAI_EVALUATORS = get_evaluators_from_module(content_safety)
+
+    @pytest.mark.parametrize('evaluator', EVALUATORS)
+    def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None:
+        """Test regular evaluator saving."""
+        pf_client.flows.save(evaluator, path=tmpdir)
+        assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))
+
+    @pytest.mark.parametrize('rai_evaluator', RAI_EVALUATORS)
+    def test_save_rai_evaluators(self, tmpdir, pf_client, rai_evaluator):
+        """Test saving of RAI evaluators"""
+        pf_client.flows.save(rai_evaluator, path=tmpdir)
+        assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))