Skip to content

Commit

Permalink
Remove ModelConfig type as a temporary solution. (#2836)
Browse files Browse the repository at this point in the history
# Description

In this PR we are removing types from the evaluator constructors as a
temporary solution, before AzureOpenAIModelConfiguration type in the
evaluator constructor will be officially supported.

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [x] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [x] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [x] Pull request includes test coverage for the included changes.
  • Loading branch information
nick863 authored Apr 17, 2024
1 parent ee002ba commit 8f043ed
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@

import numpy as np

from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluators import CoherenceEvaluator, FluencyEvaluator, GroundednessEvaluator, RelevanceEvaluator

logger = logging.getLogger(__name__)


class ChatEvaluator:
def __init__(
self, model_config: AzureOpenAIModelConfiguration, eval_last_turn: bool = False, parallel: bool = True
self, model_config, eval_last_turn: bool = False, parallel: bool = True
):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down Expand Up @@ -64,7 +63,7 @@ def __init__(
FluencyEvaluator(model_config),
]

def __call__(self, *, conversation: List[Dict], **kwargs):
def __call__(self, *, conversation, **kwargs):
"""Evaluates chat scenario.
:param conversation: The conversation to be evaluated. Each turn should have "role" and "content" keys.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from pathlib import Path

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core._prompty_utils import convert_model_configuration_to_connection


class CoherenceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from promptflow.client import load_flow
from pathlib import Path
from azure.core.credentials import TokenCredential
from promptflow.evals._constants import EvaluationMetrics
from typing import Optional


class HateUnfairnessEvaluator:
def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
def __init__(self, project_scope: dict, credential=None):
"""
Initialize an evaluator for hate unfairness score.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from promptflow.client import load_flow
from pathlib import Path
from azure.core.credentials import TokenCredential
from promptflow.evals._constants import EvaluationMetrics
from typing import Optional


class SelfHarmEvaluator:
def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
def __init__(self, project_scope: dict, credential=None):
"""
Initialize an evaluator for self harm score.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from promptflow.client import load_flow
from pathlib import Path
from azure.core.credentials import TokenCredential
from promptflow.evals._constants import EvaluationMetrics
from typing import Optional


class SexualEvaluator:
def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
def __init__(self, project_scope: dict, credential=None):
"""
Initialize an evaluator for sexual score.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from promptflow.client import load_flow
from pathlib import Path
from azure.core.credentials import TokenCredential
from promptflow.evals._constants import EvaluationMetrics
from typing import Optional


class ViolenceEvaluator:
def __init__(self, project_scope: dict, credential: Optional[TokenCredential] = None):
def __init__(self, project_scope: dict, credential=None):
"""
Initialize an evaluator for violence score.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from pathlib import Path

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core._prompty_utils import convert_model_configuration_to_connection


class FluencyEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from pathlib import Path

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core._prompty_utils import convert_model_configuration_to_connection


class GroundednessEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore

from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluators import (
CoherenceEvaluator,
F1ScoreEvaluator,
Expand All @@ -16,7 +15,7 @@


class QAEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from pathlib import Path

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core._prompty_utils import convert_model_configuration_to_connection


class RelevanceEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from pathlib import Path

from promptflow.client import load_flow
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.core._prompty_utils import convert_model_configuration_to_connection


class SimilarityEvaluator:
def __init__(self, model_config: AzureOpenAIModelConfiguration):
def __init__(self, model_config):
"""
Initialize an evaluator configured for a specific Azure OpenAI model.
Expand Down
7 changes: 7 additions & 0 deletions src/promptflow-evals/tests/evals/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
from pytest_mock import MockerFixture

from promptflow.client import PFClient
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.executor._line_execution_process_pool import _process_wrapper
from promptflow.executor._process_manager import create_spawned_fork_process_manager
Expand Down Expand Up @@ -72,6 +73,12 @@ def model_config() -> dict:
return model_config


@pytest.fixture
def pf_client() -> PFClient:
"""The fixture, returning PRClient"""
return PFClient()


# ==================== Recording injection ====================
# To inject patches in subprocesses, add new mock method in setup_recording_injection_if_enabled
# in fork mode, this is automatically enabled.
Expand Down
38 changes: 38 additions & 0 deletions src/promptflow-evals/tests/evals/unittests/test_save_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Any, List, Optional, Type

import inspect
import os
import pytest

from promptflow.evals import evaluators
from promptflow.evals.evaluators import content_safety


def get_evaluators_from_module(namespace: Any, exceptions: Optional[List[str]] = None) -> List[Type]:
evaluators = []
for name, obj in inspect.getmembers(namespace):
if inspect.isclass(obj):
if exceptions and name in exceptions:
continue
evaluators.append(obj)
return evaluators


@pytest.mark.unittest
class TestSaveEval:
"""Test saving evaluators."""

EVALUATORS = get_evaluators_from_module(evaluators)
RAI_EVALUATORS = get_evaluators_from_module(content_safety)

@pytest.mark.parametrize('evaluator', EVALUATORS)
def test_save_evaluators(self, tmpdir, pf_client, evaluator) -> None:
"""Test regular evaluator saving."""
pf_client.flows.save(evaluator, path=tmpdir)
assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))

@pytest.mark.parametrize('rai_evaluator', RAI_EVALUATORS)
def test_save_rai_evaluators(self, tmpdir, pf_client, rai_evaluator):
"""Test saving of RAI evaluators"""
pf_client.flows.save(rai_evaluator, path=tmpdir)
assert os.path.isfile(os.path.join(tmpdir, 'flow.flex.yaml'))

0 comments on commit 8f043ed

Please sign in to comment.