From aad953423aef3930334bc9b264bec3b533779862 Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Mon, 3 Jun 2024 15:20:03 +0000 Subject: [PATCH 1/8] add internal server error log print --- .../api_server_simple/query_single.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index 62bb4dc45..7e5edfbe8 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -75,8 +75,35 @@ json=sample_input, stream=args.streaming_response, ) +try: + outputs.raise_for_status() +except requests.exceptions.HTTPError as err: + if "Internal Server Error" in str(err): + import os + + folder_path = "/tmp/ray/session_latest/logs/serve" + latest_file = None + latest_time = 0.0 + + for file_name in os.listdir(folder_path): + if file_name.startswith("replica") and file_name.endswith(".log"): + file_path = os.path.join(folder_path, file_name) + file_time = os.path.getmtime(file_path) + if file_time > latest_time: + latest_time = file_time + latest_file = file_path + if latest_file: + print("latest file:", latest_file) + with open(latest_file, "r") as file: + lines = file.readlines() + if lines: + print("Latest Internal Server Error logs:", lines) + else: + print("Internal Server Error logs: Empty") + else: + raise err + -outputs.raise_for_status() if args.streaming_response: for output in outputs.iter_content(chunk_size=None, decode_unicode=True): print(output, end="", flush=True) From 972b9a52336e5ebfce9748452a565ae7c24f91e1 Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Fri, 14 Jun 2024 13:18:30 +0000 Subject: [PATCH 2/8] fix --- .../inference/api_server_simple/query_single.py | 7 +++++-- llm_on_ray/inference/api_server_simple.py | 17 +++++++++++++++-- llm_on_ray/inference/serve.py | 5 ++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index 7e5edfbe8..9bbd0b07c 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -55,7 +55,7 @@ ) args = parser.parse_args() -prompt = "Once upon a time," +prompt = "Once upon a time,ejfgaf help me !!" config: Dict[str, Union[int, float]] = {} if args.max_new_tokens: config["max_new_tokens"] = int(args.max_new_tokens) @@ -76,9 +76,12 @@ stream=args.streaming_response, ) try: + print(1) outputs.raise_for_status() + print(outputs) + print(1) except requests.exceptions.HTTPError as err: - if "Internal Server Error" in str(err): + if "Client" in str(err): import os folder_path = "/tmp/ray/session_latest/logs/serve" diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py index f2cf0a1e7..78a4221c6 100644 --- a/llm_on_ray/inference/api_server_simple.py +++ b/llm_on_ray/inference/api_server_simple.py @@ -16,6 +16,15 @@ import ray from ray import serve +import logging + +logger = logging.getLogger(__name__) +from ray.experimental.state.api import get_log, list_logs, list_nodes, list_workers + + +def custom_error_handler(request, exc): + logger.error("sss") + return {"error:": "Internal Server Error"} def serve_run(deployments, model_list): @@ -23,11 +32,14 @@ def serve_run(deployments, model_list): print("deploy model: ", model_id) deployment = deployments[model_id] - serve.start(http_options={"host": infer_conf.host, "port": infer_conf.port}) + serve.start( + http_options={"host": infer_conf.host, "port": infer_conf.port, "log_level": "DEBUG"} + ) serve.run( deployment, name=infer_conf.name, route_prefix=infer_conf.route_prefix, + # error_handler= custom_error_handler ) deployment_name = infer_conf.name if infer_conf.host == "0.0.0.0": @@ -40,5 +52,6 @@ def serve_run(deployments, model_list): host_ip = infer_conf.host url = f"http://{host_ip}:{infer_conf.port}{infer_conf.route_prefix}" print(f"Deployment '{deployment_name}' is ready at `{url}`.") - + for node1 in list_nodes(): + list_logs(node_id=node1) return deployments diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index a84717664..ca6f0b76c 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -15,6 +15,7 @@ # import ray import sys +import logging from pydantic_yaml import parse_yaml_raw_as from llm_on_ray.inference.utils import get_deployment_actor_options from llm_on_ray.inference.api_server_simple import serve_run @@ -130,11 +131,13 @@ def main(argv=None): args = parser.parse_args(argv) - ray.init(address="auto") + ray.init(address="auto", logging_level=logging.DEBUG) deployments, model_list = get_deployed_models(args) if args.simple: # provide simple model endpoint # models can be served to customed URLs according to configuration files. + # from api_server_simple import serve_run + serve_run(deployments, model_list) else: # provide OpenAI compatible api to run LLM models From 7446f69d837e555304398bd4a2b2a55d4d3378fe Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Fri, 14 Jun 2024 16:30:18 +0000 Subject: [PATCH 3/8] fix --- .../api_server_simple/query_single.py | 35 +++------------ llm_on_ray/inference/serve.py | 5 +-- tests/inference/test_example_simple.py | 45 ++++++++++++++++--- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index 9bbd0b07c..9c1c87899 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -55,7 +55,7 @@ ) args = parser.parse_args() -prompt = "Once upon a time,ejfgaf help me !!" +prompt = "Once upon a time," config: Dict[str, Union[int, float]] = {} if args.max_new_tokens: config["max_new_tokens"] = int(args.max_new_tokens) @@ -75,36 +75,13 @@ json=sample_input, stream=args.streaming_response, ) -try: - print(1) - outputs.raise_for_status() - print(outputs) - print(1) -except requests.exceptions.HTTPError as err: - if "Client" in str(err): - import os - folder_path = "/tmp/ray/session_latest/logs/serve" - latest_file = None - latest_time = 0.0 +outputs.raise_for_status() - for file_name in os.listdir(folder_path): - if file_name.startswith("replica") and file_name.endswith(".log"): - file_path = os.path.join(folder_path, file_name) - file_time = os.path.getmtime(file_path) - if file_time > latest_time: - latest_time = file_time - latest_file = file_path - if latest_file: - print("latest file:", latest_file) - with open(latest_file, "r") as file: - lines = file.readlines() - if lines: - print("Latest Internal Server Error logs:", lines) - else: - print("Internal Server Error logs: Empty") - else: - raise err +from requests.exceptions import HTTPError + +http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}" +raise HTTPError(http_error_msg) if args.streaming_response: diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index ca6f0b76c..a84717664 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -15,7 +15,6 @@ # import ray import sys -import logging from pydantic_yaml import parse_yaml_raw_as from llm_on_ray.inference.utils import get_deployment_actor_options from llm_on_ray.inference.api_server_simple import serve_run @@ -131,13 +130,11 @@ def main(argv=None): args = parser.parse_args(argv) - ray.init(address="auto", logging_level=logging.DEBUG) + ray.init(address="auto") deployments, model_list = get_deployed_models(args) if args.simple: # provide simple model endpoint # models can be served to customed URLs according to configuration files. - # from api_server_simple import serve_run - serve_run(deployments, model_list) else: # provide OpenAI compatible api to run LLM models diff --git a/tests/inference/test_example_simple.py b/tests/inference/test_example_simple.py index a0842f481..14e2657b3 100644 --- a/tests/inference/test_example_simple.py +++ b/tests/inference/test_example_simple.py @@ -53,16 +53,47 @@ def script_with_args( if top_k is not None: cmd_single.extend(["--top_k", str(top_k)]) - result_query_single = subprocess.run(cmd_single, capture_output=True, text=True) + try: + result_query_single = subprocess.run(cmd_single, capture_output=True, text=True, check=True) - # Print the output of subprocess.run for checking if output is expected - print(result_query_single) + # Print the output of subprocess.run for checking if output is expected + print("\n" + "Model in simple output message: " + "\n", result_query_single.stdout) - # Ensure there are no errors in the OpenAI API query script execution - assert "Error" not in result_query_single.stderr + assert isinstance(result_query_single.stdout, str), print( + "\n" + "Simple output is nor string" + "\n" + ) - # Returncode should be 0 when there is no exception - assert result_query_single.returncode == 0 + assert len(result_query_single.stdout) > 0, print("\n" + "Simple output length is 0" + "\n") + + except subprocess.CalledProcessError as e: + if "Internal Server Error" in e.stderr: + print(e.stderr) + # Find the latest Internal Server Error log file + folder_path = "/tmp/ray/session_latest/logs/serve" + latest_file = None + latest_time = 0.0 + + for file_name in os.listdir(folder_path): + if file_name.startswith("replica") and file_name.endswith(".log"): + file_path = os.path.join(folder_path, file_name) + file_time = os.path.getmtime(file_path) + if file_time > latest_time: + latest_time = file_time + latest_file = file_path + if latest_file: + print("latest file:", latest_file) + with open(latest_file, "r") as file: + lines = file.readlines() + if lines: + print("Latest Internal Server Error logs:", lines) + else: + print("Internal Server Error logs: Empty") + assert False, print("Internal Server Error") + else: + # Returncode should be 0 when there is no errors in exception + assert e.returncode == 0, print( + "\n" + "Simple query error stderr message: " + "\n", e.stderr + ) executed_models = [] From 92954ec2713c6cc687b54a24d526a97c592ccf49 Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Fri, 14 Jun 2024 16:33:43 +0000 Subject: [PATCH 4/8] add test --- llm_on_ray/inference/api_server_simple.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py index 78a4221c6..f2cf0a1e7 100644 --- a/llm_on_ray/inference/api_server_simple.py +++ b/llm_on_ray/inference/api_server_simple.py @@ -16,15 +16,6 @@ import ray from ray import serve -import logging - -logger = logging.getLogger(__name__) -from ray.experimental.state.api import get_log, list_logs, list_nodes, list_workers - - -def custom_error_handler(request, exc): - logger.error("sss") - return {"error:": "Internal Server Error"} def serve_run(deployments, model_list): @@ -32,14 +23,11 @@ def serve_run(deployments, model_list): print("deploy model: ", model_id) deployment = deployments[model_id] - serve.start( - http_options={"host": infer_conf.host, "port": infer_conf.port, "log_level": "DEBUG"} - ) + serve.start(http_options={"host": infer_conf.host, "port": infer_conf.port}) serve.run( deployment, name=infer_conf.name, route_prefix=infer_conf.route_prefix, - # error_handler= custom_error_handler ) deployment_name = infer_conf.name if infer_conf.host == "0.0.0.0": @@ -52,6 +40,5 @@ def serve_run(deployments, model_list): host_ip = infer_conf.host url = f"http://{host_ip}:{infer_conf.port}{infer_conf.route_prefix}" print(f"Deployment '{deployment_name}' is ready at `{url}`.") - for node1 in list_nodes(): - list_logs(node_id=node1) + return deployments From 70da080a1d927487f55ef4654d30cdc09f66331f Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Tue, 25 Jun 2024 09:22:54 +0000 Subject: [PATCH 5/8] test ds --- tests/inference/test_example_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/inference/test_example_simple.py b/tests/inference/test_example_simple.py index 14e2657b3..37208875f 100644 --- a/tests/inference/test_example_simple.py +++ b/tests/inference/test_example_simple.py @@ -71,7 +71,7 @@ def script_with_args( # Find the latest Internal Server Error log file folder_path = "/tmp/ray/session_latest/logs/serve" latest_file = None - latest_time = 0.0 + latest_time = 0.00 for file_name in os.listdir(folder_path): if file_name.startswith("replica") and file_name.endswith(".log"): From 2af1c34b05f80d3ebda2d12877b72bc9a4474106 Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Tue, 25 Jun 2024 09:25:16 +0000 Subject: [PATCH 6/8] test ds --- examples/inference/api_server_simple/query_single.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index 9c1c87899..6246d8178 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -78,10 +78,10 @@ outputs.raise_for_status() -from requests.exceptions import HTTPError +# from requests.exceptions import HTTPError -http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}" -raise HTTPError(http_error_msg) +# http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}" +# raise HTTPError(http_error_msg) if args.streaming_response: From 532aa7f9ecf5cf0cd52aa3a5fa516b2a266f151f Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Tue, 25 Jun 2024 09:27:08 +0000 Subject: [PATCH 7/8] test ds --- examples/inference/api_server_simple/query_single.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index 6246d8178..a77225cb7 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -78,11 +78,6 @@ outputs.raise_for_status() -# from requests.exceptions import HTTPError - -# http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}" -# raise HTTPError(http_error_msg) - if args.streaming_response: for output in outputs.iter_content(chunk_size=None, decode_unicode=True): From 7d28661699b1e64784ce33d38bc883b26256d5dd Mon Sep 17 00:00:00 2001 From: yutianchen666 Date: Tue, 25 Jun 2024 09:27:43 +0000 Subject: [PATCH 8/8] test ds --- examples/inference/api_server_simple/query_single.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py index a77225cb7..29140faac 100644 --- a/examples/inference/api_server_simple/query_single.py +++ b/examples/inference/api_server_simple/query_single.py @@ -78,7 +78,6 @@ outputs.raise_for_status() - if args.streaming_response: for output in outputs.iter_content(chunk_size=None, decode_unicode=True): print(output, end="", flush=True)