From d61960f73acf8a1502c005489b64d4d52087e5f6 Mon Sep 17 00:00:00 2001 From: nick863 <30440255+nick863@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:35:53 -0700 Subject: [PATCH] Add draft Readme.MD (#2891) # Description Add draft Readme.MD to allow uploading package to pypi. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --- src/promptflow-evals/README.md | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/promptflow-evals/README.md b/src/promptflow-evals/README.md index e69de29bb2d..eda04de22c4 100644 --- a/src/promptflow-evals/README.md +++ b/src/promptflow-evals/README.md @@ -0,0 +1,83 @@ +# Prompt flow evaluators + +[![Python package](https://img.shields.io/pypi/v/promptflow-evals)](https://pypi.org/project/promptflow-evals/) +[![License: MIT](https://img.shields.io/github/license/microsoft/promptflow)](https://github.com/microsoft/promptflow/blob/main/LICENSE) + +## Introduction +Evaluators are custom or prebuilt promptflow flows that are designed to measure the quality of the outputs from language models. + +## Usage +Users can create evaluator runs on the local machine as shown in the example below: + +```python +import os +from pprint import pprint + +from promptflow.core import AzureOpenAIModelConfiguration +from promptflow.evals.evaluate import evaluate +from promptflow.evals.evaluators import RelevanceEvaluator +from promptflow.evals.evaluators.content_safety import ViolenceEvaluator + + +def answer_length(answer, **kwargs): + return {"value": len(answer)} + + +if __name__ == "__main__": + # Built-in evaluators + # Initialize Azure OpenAI Connection + model_config = AzureOpenAIModelConfiguration( + azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), + api_key=os.environ.get("AZURE_OPENAI_KEY"), + azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"), + ) + + # Initialzing Relevance Evaluator + relevance_eval = RelevanceEvaluator(model_config) + + # Running Relevance Evaluator on single input row + relevance_score = relevance_eval( + answer="The Alpine Explorer Tent is the most waterproof.", + context="From the our product list," + " the alpine explorer tent is the most waterproof." + " The Adventure Dining Table has higher weight.", + question="Which tent is the most waterproof?", + ) + + pprint(relevance_score) + # {'gpt_relevance': 5.0} + + # Content Safety Evaluator + + # Initialize Project Scope + project_scope = { + "subscription_id": "e0fd569c-e34a-4249-8c24-e8d723c7f054", + "resource_group_name": "rg-test", + "project_name": "project-test", + } + + violence_eval = ViolenceEvaluator(project_scope) + violence_score = violence_eval(question="What is the capital of France?", answer="Paris.") + pprint(violence_score) + # {'violence': 'Safe', + # 'violence_reason': "The system's response is a straightforward factual answer " + # 'to a geography question. There is no violent content or ' + # 'language present.', + # 'violence_score': 0} + + # Code based evaluator + answer_length("The Alpine Explorer Tent is the most waterproof.") + # {'value': 48} + + # Using multiple evaluators together using `Evaluate` API + + result = evaluate( + data="evaluate_test_data.jsonl", + evaluators={ + "answer_length": answer_length, + "violence": violence_eval, + }, + ) + + pprint(result) +``` \ No newline at end of file