diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py index 9437541d082..4fd030eaa47 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py @@ -269,12 +269,13 @@ def evaluate( target: Optional[Callable] = None, data: Optional[str] = None, evaluators: Optional[Dict[str, Callable]] = None, - evaluator_config: Optional[Dict[str, Dict[str, str]]] = {}, + evaluator_config: Optional[Dict[str, Dict[str, str]]] = None, azure_ai_project: Optional[Dict] = None, output_path: Optional[str] = None, **kwargs, ): - """Evaluates target or data with built-in evaluation metrics + """Evaluates target or data with built-in or custom evaluators. If both target and data are provided, + data will be run through target function and then results will be evaluated. :keyword evaluation_name: Display name of the evaluation. :paramtype evaluation_name: Optional[str] @@ -283,14 +284,62 @@ def evaluate( :keyword data: Path to the data to be evaluated or passed to target if target is set. Only .jsonl format files are supported. `target` and `data` both cannot be None :paramtype data: Optional[str] - :keyword evaluator_config: Configuration for evaluators. + :keyword evaluators: Evaluators to be used for evaluation. It should be a dictionary with key as alias for evaluator + and value as the evaluator function. + :paramtype evaluators: Optional[Dict[str, Callable] + :keyword evaluator_config: Configuration for evaluators. The configuration should be a dictionary with evaluator + names as keys and a dictionary of column mappings as values. The column mappings should be a dictionary with + keys as the column names in the evaluator input and values as the column names in the input data or data + generated by target. :paramtype evaluator_config: Optional[Dict[str, Dict[str, str]] - :keyword output_path: The local folder path to save evaluation artifacts to if set + :keyword output_path: The local folder or file path to save evaluation results to if set. If folder path is provided + the results will be saved to a file named `evaluation_results.json` in the folder. :paramtype output_path: Optional[str] - :keyword azure_ai_project: Logs evaluation results to AI Studio + :keyword azure_ai_project: Logs evaluation results to AI Studio if set. :paramtype azure_ai_project: Optional[Dict] - :return: A EvaluationResult object. - :rtype: ~azure.ai.generative.evaluate.EvaluationResult + :return: Evaluation results. + :rtype: dict + + :Example: + + Evaluate API can be used as follows: + + .. code-block:: python + + from promptflow.core import AzureOpenAIModelConfiguration + from promptflow.evals.evaluate import evaluate + from promptflow.evals.evaluators import RelevanceEvaluator, CoherenceEvaluator + + + model_config = AzureOpenAIModelConfiguration( + azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), + api_key=os.environ.get("AZURE_OPENAI_KEY"), + azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT") + ) + + coherence_eval = CoherenceEvaluator(model_config=model_config) + relevance_eval = RelevanceEvaluator(model_config=model_config) + + path = "evaluate_test_data.jsonl" + result = evaluate( + data=path, + evaluators={ + "coherence": coherence_eval, + "relevance": relevance_eval, + }, + evaluator_config={ + "coherence": { + "answer": "${data.answer}", + "question": "${data.question}" + }, + "relevance": { + "answer": "${data.answer}", + "context": "${data.context}", + "question": "${data.question}" + } + } + ) + """ trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None @@ -298,6 +347,8 @@ def evaluate( input_data_df = _validate_and_load_data(target, data, evaluators, output_path, azure_ai_project, evaluation_name) # Process evaluator config to replace ${target.} with ${data.} + if evaluator_config is None: + evaluator_config = {} evaluator_config = _process_evaluator_config(evaluator_config) _validate_columns(input_data_df, evaluators, target, evaluator_config)