From 8f043ed61a190a554758e2665084a557801b5b39 Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:39:49 -0700 Subject: [PATCH] Remove ModelConfig type as a temporary solution. (#2836) # Description In this PR we are removing types from the evaluator constructors as a temporary solution, before AzureOpenAIModelConfiguration type in the evaluator constructor will be officially supported. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --- .../evals/evaluators/chat/__init__.py | 5 +-- .../evals/evaluators/coherence/__init__.py | 3 +- .../content_safety/hate_unfairness.py | 4 +- .../evaluators/content_safety/self_harm.py | 4 +- .../evals/evaluators/content_safety/sexual.py | 4 +- .../evaluators/content_safety/violence.py | 4 +- .../evals/evaluators/fluency/__init__.py | 3 +- .../evals/evaluators/groundedness/__init__.py | 3 +- .../evals/evaluators/qa/__init__.py | 3 +- .../evals/evaluators/relevance/__init__.py | 3 +- .../evals/evaluators/similarity/__init__.py | 3 +- src/promptflow-evals/tests/evals/conftest.py | 7 ++++ .../tests/evals/unittests/test_save_eval.py | 38 +++++++++++++++++++ 13 files changed, 57 insertions(+), 27 deletions(-) create mode 100644 src/promptflow-evals/tests/evals/unittests/test_save_eval.py diff --git a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py index 7878adf5c77..e2fc2b8066a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/chat/__init__.py @@ -11,7 +11,6 @@ import numpy as np -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import CoherenceEvaluator, FluencyEvaluator, GroundednessEvaluator, RelevanceEvaluator logger = logging.getLogger(__name__) @@ -19,7 +18,7 @@ class ChatEvaluator: def __init__( - self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True + self, model_config, eval_last_turn: bool = False, parallel: bool = True ): """ Initialize an evaluator configured for a specific Azure OpenAI model. @@ -64,7 +63,7 @@ def __init__( FluencyEvaluator(model_config), ] - def __call__(self, *, conversation: List[Dict], **kwargs): + def __call__(self, *, conversation, **kwargs): """Evaluates chat scenario. :param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py index 2fb81de63b0..023a52845d8 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/coherence/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class CoherenceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py index 6cb9467533f..323df141d2e 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/hate_unfairness.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class HateUnfairnessEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for hate unfairness score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py index bb01f05dd55..b2ff8554bfd 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/self_harm.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SelfHarmEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for self harm score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py index fb0d3f79d71..b1247369cdc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/sexual.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class SexualEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for sexual score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py index 47382d2c330..29bc631c866 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/content_safety/violence.py @@ -1,12 +1,10 @@ from promptflow.client import load_flow from pathlib import Path -from azure.core.credentials import TokenCredential from promptflow.evals._constants import EvaluationMetrics -from typing import Optional class ViolenceEvaluator: - def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None): + def __init__(self, project_scope: dict, credential=None): """ Initialize an evaluator for violence score. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py index 54300057cf0..4d8fc742c03 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/fluency/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class FluencyEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py index f876a20c5bb..5023ee640cc 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/groundedness/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class GroundednessEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py index f8d27ad2675..09955b6da95 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/qa/__init__.py @@ -4,7 +4,6 @@ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.evals.evaluators import ( CoherenceEvaluator, F1ScoreEvaluator, @@ -16,7 +15,7 @@ class QAEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py index 95d93a67f89..6d1d89ad68a 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/relevance/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class RelevanceEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py index 58f27d786c8..a36bd032a1f 100644 --- a/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py +++ b/src/promptflow-evals/promptflow/evals/evaluators/similarity/__init__.py @@ -7,12 +7,11 @@ from pathlib import Path from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration from promptflow.core._prompty_utils import convert_model_configuration_to_connection class SimilarityEvaluator: - def __init__(self, model_config: AzureOpenAIModelConfiguration): + def __init__(self, model_config): """ Initialize an evaluator configured for a specific Azure OpenAI model. diff --git a/src/promptflow-evals/tests/evals/conftest.py b/src/promptflow-evals/tests/evals/conftest.py index 88a91288f84..006048bc063 100644 --- a/src/promptflow-evals/tests/evals/conftest.py +++ b/src/promptflow-evals/tests/evals/conftest.py @@ -6,6 +6,7 @@ import pytest from pytest_mock import MockerFixture +from promptflow.client import PFClient from promptflow.core import AzureOpenAIModelConfiguration from promptflow.executor._line_execution_process_pool import _process_wrapper from promptflow.executor._process_manager import create_spawned_fork_process_manager @@ -72,6 +73,12 @@ def model_config() -> dict: return model_config +@pytest.fixture +def pf_client() -> PFClient: + """The fixture, returning PRClient""" + return PFClient() + + # ==================== Recording injection ==================== # To inject patches in subprocesses, add new mock method in setup_recording_injection_if_enabled # in fork mode, this is automatically enabled. diff --git a/src/promptflow-evals/tests/evals/unittests/test_save_eval.py b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py new file mode 100644 index 00000000000..4d997dc18f2 --- /dev/null +++ b/src/promptflow-evals/tests/evals/unittests/test_save_eval.py @@ -0,0 +1,38 @@ +from typing import Any, List, Optional, Type + +import inspect +import os +import pytest + +from promptflow.evals import evaluators +from promptflow.evals.evaluators import content_safety + + +def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]: + evaluators = [] + for name, obj in inspect.getmembers(namespace): + if inspect.isclass(obj): + if exceptions and name in exceptions: + continue + evaluators.append(obj) + return evaluators + + +@pytest.mark.unittest +class TestSaveEval: + """Test saving evaluators.""" + + EVALUATORS = get_evaluators_from_module(evaluators) + RAI_EVALUATORS = get_evaluators_from_module(content_safety) + + @pytest.mark.parametrize('evaluator', EVALUATORS) + def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None: + """Test regular evaluator saving.""" + pf_client.flows.save(evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml')) + + @pytest.mark.parametrize('rai_evaluator', RAI_EVALUATORS) + def test_save_rai_evaluators(self, tmpdir, pf_client, rai_evaluator): + """Test saving of RAI evaluators""" + pf_client.flows.save(rai_evaluator, path=tmpdir) + assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))