From ad9ad8309a0c96a13cbdeb71049dcfd4fad98c27 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 23 Jul 2024 22:33:04 +0000 Subject: [PATCH 1/4] Use `python3` in README instructions The `python` symlink may be missing on a system; or even point to py2. We should use `python3` to be sure. (It's ok to use `python` inside a virtualenv though.) Signed-off-by: Ihar Hrachyshka --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c8d667..51023ba 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ scripts/infra/cloud-instance.sh ec2 ssh # Regardless of how you setup your instance git clone https://github.com/instructlab/taxonomy.git && pushd taxonomy && git branch rc && popd git clone --bare https://github.com/instructlab/eval.git && git clone eval.git/ && cd eval && git remote add syncrepo ../eval.git -python -m venv venv +python3 -m venv venv source venv/bin/activate pip install -r requirements.txt pip install -r requirements-dev.txt From 568dffb8d99e944b2792ac6edbd11d5a44ba3da8 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 31 Jul 2024 17:50:15 +0000 Subject: [PATCH 2/4] Introduce hard-fail case for openai error handling Before the patch, all errors from openai were handled by retrying up to API_MAX_RETRY times and returning $ERROR$ message at the last attempt. With this patch, if all attempts result in APIConnectionError, we raise a new EvalError exception. (If at least one of the previous attempts result in a different error, then we return $ERROR$ as usual.) Also, several errors are not expected to recover with a retry (400-404, 422). This patch makes them return $ERROR$ immediately without retrying. Closes: #77 Signed-off-by: Ihar Hrachyshka --- src/instructlab/eval/exceptions.py | 12 ++++++++ src/instructlab/eval/mt_bench_common.py | 40 +++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py index 1037d90..aa31151 100644 --- a/src/instructlab/eval/exceptions.py +++ b/src/instructlab/eval/exceptions.py @@ -107,3 +107,15 @@ def __init__(self, tasks_dir) -> None: super().__init__() self.tasks_dir = tasks_dir self.message = f"Invalid Tasks Dir: {tasks_dir}" + + +class OpenAIError(EvalError): + """ + Error raised when reply retrieval from OpenAI API fails. + Attributes + message error message to be printed on raise + """ + + def __init__(self) -> None: + super().__init__() + self.message = "Failed to receive a reply from API." diff --git a/src/instructlab/eval/mt_bench_common.py b/src/instructlab/eval/mt_bench_common.py index 86f9920..eb76279 100644 --- a/src/instructlab/eval/mt_bench_common.py +++ b/src/instructlab/eval/mt_bench_common.py @@ -17,6 +17,9 @@ from fastchat.model.model_adapter import get_conversation_template # type: ignore import openai +# First Party +from instructlab.eval import exceptions + # Local from .logger_config import setup_logger @@ -247,9 +250,15 @@ def play_a_match_single( return result +def _is_fatal_openai_error(e: openai.OpenAIError) -> bool: + return isinstance(e, openai.APIConnectionError) + + def chat_completion_openai( openai_client, model, conv, temperature, max_tokens, merge_system_user_message=False ) -> str: + output = None + for i in range(API_MAX_RETRY): try: messages = conv.to_openai_api_messages() @@ -269,15 +278,40 @@ def chat_completion_openai( temperature=temperature, max_tokens=max_tokens, ) - return response.choices[0].message.content - except openai.OpenAIError as e: + output = response.choices[0].message.content + break + except ( + # retry may help with these errors + openai.APIConnectionError, + openai.RateLimitError, # 429 + openai.InternalServerError, # >=500 + # NOTE: Errors listed below may need a revisit: we are not sure if + # it's ever helpful to retry them. Leaving them intact for now. + openai.AuthenticationError, # 401 + openai.PermissionDeniedError, # 403 + openai.NotFoundError, # 404 + ) as e: + if not _is_fatal_openai_error(e): + output = API_ERROR_OUTPUT # disable hard fail (never raise!) + # still, retry in the hope we'll get a successful reply if i == API_MAX_RETRY - 1: logger.error(e) break logger.debug(e) time.sleep(API_RETRY_SLEEP) + except ( + # retry won't fix these errors + openai.BadRequestError, # 400 + openai.UnprocessableEntityError, # 422 + ) as e: + logger.debug(e) + return API_ERROR_OUTPUT # immediately soft fail - return API_ERROR_OUTPUT + if output is None: + # not a single attempt was non-fatal; this is indicative of + # basic connectivity or server issue -> hard fail + raise exceptions.OpenAIError + return output def check_data(questions, model_answers, ref_answers, models, judges): From abf0f4115e5e7e9026a3407fa15b32e7b20b2f85 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 31 Jul 2024 21:04:05 +0000 Subject: [PATCH 3/4] Calculate messages for openai completion once Before the patch, we were calculating them on every retry attempt. The function is pure, so there is no good reason to repeat the calculation. This also simplifies the function a bit. Signed-off-by: Ihar Hrachyshka --- pyproject.toml | 3 ++ src/instructlab/eval/mt_bench_common.py | 46 ++++++++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b11c7bd..03faef9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,3 +96,6 @@ from-first = true # import-heading-firstparty=First Party # import-heading-localfolder=Local known-local-folder = ["tuning"] + +[tool.mypy] +ignore_missing_imports = true diff --git a/src/instructlab/eval/mt_bench_common.py b/src/instructlab/eval/mt_bench_common.py index eb76279..89dc58a 100644 --- a/src/instructlab/eval/mt_bench_common.py +++ b/src/instructlab/eval/mt_bench_common.py @@ -4,7 +4,7 @@ """ # Standard -from typing import Optional +from typing import Optional, TypedDict import ast import dataclasses import glob @@ -14,6 +14,7 @@ import time # Third Party +from fastchat import conversation from fastchat.model.model_adapter import get_conversation_template # type: ignore import openai @@ -254,23 +255,44 @@ def _is_fatal_openai_error(e: openai.OpenAIError) -> bool: return isinstance(e, openai.APIConnectionError) +# TODO: copied from instructlab (cli) utils module; consolidate somewhere? +class Message(TypedDict): + """ + Represents a message within an AI conversation. + """ + + content: str + # one of: "user", "assistant", or "system" + role: str + + +def _get_messages( + conv: conversation.Conversation, merge_system_user_message: bool +) -> list[Message]: + messages = conv.to_openai_api_messages() + if ( + (merge_system_user_message or conv.name == "mistral") + and messages[0]["role"] == "system" + and messages[1]["role"] == "user" + ): + messages[1]["content"] = messages[0]["content"] + "\n" + messages[1]["content"] + return messages[1:] + return messages + + def chat_completion_openai( - openai_client, model, conv, temperature, max_tokens, merge_system_user_message=False + openai_client, + model, + conv: conversation.Conversation, + temperature, + max_tokens, + merge_system_user_message: bool = False, ) -> str: output = None + messages = _get_messages(conv, merge_system_user_message) for i in range(API_MAX_RETRY): try: - messages = conv.to_openai_api_messages() - if ( - (merge_system_user_message or conv.name == "mistral") - and messages[0]["role"] == "system" - and messages[1]["role"] == "user" - ): - messages[1]["content"] = ( - messages[0]["content"] + "\n" + messages[1]["content"] - ) - messages = messages[1:] response = openai_client.chat.completions.create( model=model, messages=messages, From 7fbd87e969ead41c466705203bed278d307d0be2 Mon Sep 17 00:00:00 2001 From: Dan McPherson Date: Thu, 15 Aug 2024 22:15:15 -0400 Subject: [PATCH 4/4] Add to fatal exceptions and handle OpenAIError Signed-off-by: Dan McPherson --- src/instructlab/eval/exceptions.py | 6 +++--- src/instructlab/eval/mt_bench_common.py | 28 +++++++++++++++++-------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py index aa31151..b3c7ca5 100644 --- a/src/instructlab/eval/exceptions.py +++ b/src/instructlab/eval/exceptions.py @@ -109,13 +109,13 @@ def __init__(self, tasks_dir) -> None: self.message = f"Invalid Tasks Dir: {tasks_dir}" -class OpenAIError(EvalError): +class ModelServingAPIError(EvalError): """ - Error raised when reply retrieval from OpenAI API fails. + Error raised when reply retrieval from model serving fails. Attributes message error message to be printed on raise """ def __init__(self) -> None: super().__init__() - self.message = "Failed to receive a reply from API." + self.message = "Failed to receive a reply from model serving API." diff --git a/src/instructlab/eval/mt_bench_common.py b/src/instructlab/eval/mt_bench_common.py index 89dc58a..ee23546 100644 --- a/src/instructlab/eval/mt_bench_common.py +++ b/src/instructlab/eval/mt_bench_common.py @@ -252,7 +252,15 @@ def play_a_match_single( def _is_fatal_openai_error(e: openai.OpenAIError) -> bool: - return isinstance(e, openai.APIConnectionError) + return isinstance( + e, + ( + openai.APIConnectionError, + openai.AuthenticationError, + openai.PermissionDeniedError, + openai.NotFoundError, + ), + ) # TODO: copied from instructlab (cli) utils module; consolidate somewhere? @@ -302,6 +310,13 @@ def chat_completion_openai( ) output = response.choices[0].message.content break + except ( + # retry won't fix these errors + openai.BadRequestError, # 400 + openai.UnprocessableEntityError, # 422 + ) as e: + logger.debug(e) + return API_ERROR_OUTPUT # immediately soft fail except ( # retry may help with these errors openai.APIConnectionError, @@ -312,6 +327,8 @@ def chat_completion_openai( openai.AuthenticationError, # 401 openai.PermissionDeniedError, # 403 openai.NotFoundError, # 404 + # General catch-all + openai.OpenAIError, ) as e: if not _is_fatal_openai_error(e): output = API_ERROR_OUTPUT # disable hard fail (never raise!) @@ -321,18 +338,11 @@ def chat_completion_openai( break logger.debug(e) time.sleep(API_RETRY_SLEEP) - except ( - # retry won't fix these errors - openai.BadRequestError, # 400 - openai.UnprocessableEntityError, # 422 - ) as e: - logger.debug(e) - return API_ERROR_OUTPUT # immediately soft fail if output is None: # not a single attempt was non-fatal; this is indicative of # basic connectivity or server issue -> hard fail - raise exceptions.OpenAIError + raise exceptions.ModelServingAPIError return output