From aad953423aef3930334bc9b264bec3b533779862 Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Mon, 3 Jun 2024 15:20:03 +0000
Subject: [PATCH 1/8] add internal server error log print

---
 .../api_server_simple/query_single.py         | 29 ++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index 62bb4dc45..7e5edfbe8 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -75,8 +75,35 @@
     json=sample_input,
     stream=args.streaming_response,
 )
+try:
+    outputs.raise_for_status()
+except requests.exceptions.HTTPError as err:
+    if "Internal Server Error" in str(err):
+        import os
+
+        folder_path = "/tmp/ray/session_latest/logs/serve"
+        latest_file = None
+        latest_time = 0.0
+
+        for file_name in os.listdir(folder_path):
+            if file_name.startswith("replica") and file_name.endswith(".log"):
+                file_path = os.path.join(folder_path, file_name)
+                file_time = os.path.getmtime(file_path)
+                if file_time > latest_time:
+                    latest_time = file_time
+                    latest_file = file_path
+        if latest_file:
+            print("latest file:", latest_file)
+            with open(latest_file, "r") as file:
+                lines = file.readlines()
+                if lines:
+                    print("Latest Internal Server Error logs:", lines)
+                else:
+                    print("Internal Server Error logs: Empty")
+    else:
+        raise err
+
 
-outputs.raise_for_status()
 if args.streaming_response:
     for output in outputs.iter_content(chunk_size=None, decode_unicode=True):
         print(output, end="", flush=True)

From 972b9a52336e5ebfce9748452a565ae7c24f91e1 Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Fri, 14 Jun 2024 13:18:30 +0000
Subject: [PATCH 2/8] fix

---
 .../inference/api_server_simple/query_single.py |  7 +++++--
 llm_on_ray/inference/api_server_simple.py       | 17 +++++++++++++++--
 llm_on_ray/inference/serve.py                   |  5 ++++-
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index 7e5edfbe8..9bbd0b07c 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -55,7 +55,7 @@
 )
 
 args = parser.parse_args()
-prompt = "Once upon a time,"
+prompt = "Once upon a time,ejfgaf help me !!"
 config: Dict[str, Union[int, float]] = {}
 if args.max_new_tokens:
     config["max_new_tokens"] = int(args.max_new_tokens)
@@ -76,9 +76,12 @@
     stream=args.streaming_response,
 )
 try:
+    print(1)
     outputs.raise_for_status()
+    print(outputs)
+    print(1)
 except requests.exceptions.HTTPError as err:
-    if "Internal Server Error" in str(err):
+    if "Client" in str(err):
         import os
 
         folder_path = "/tmp/ray/session_latest/logs/serve"
diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
index f2cf0a1e7..78a4221c6 100644
--- a/llm_on_ray/inference/api_server_simple.py
+++ b/llm_on_ray/inference/api_server_simple.py
@@ -16,6 +16,15 @@
 
 import ray
 from ray import serve
+import logging
+
+logger = logging.getLogger(__name__)
+from ray.experimental.state.api import get_log, list_logs, list_nodes, list_workers
+
+
+def custom_error_handler(request, exc):
+    logger.error("sss")
+    return {"error:": "Internal Server Error"}
 
 
 def serve_run(deployments, model_list):
@@ -23,11 +32,14 @@ def serve_run(deployments, model_list):
         print("deploy model: ", model_id)
         deployment = deployments[model_id]
 
-        serve.start(http_options={"host": infer_conf.host, "port": infer_conf.port})
+        serve.start(
+            http_options={"host": infer_conf.host, "port": infer_conf.port, "log_level": "DEBUG"}
+        )
         serve.run(
             deployment,
             name=infer_conf.name,
             route_prefix=infer_conf.route_prefix,
+            # error_handler= custom_error_handler
         )
         deployment_name = infer_conf.name
         if infer_conf.host == "0.0.0.0":
@@ -40,5 +52,6 @@ def serve_run(deployments, model_list):
             host_ip = infer_conf.host
         url = f"http://{host_ip}:{infer_conf.port}{infer_conf.route_prefix}"
         print(f"Deployment '{deployment_name}' is ready at `{url}`.")
-
+        for node1 in list_nodes():
+            list_logs(node_id=node1)
     return deployments
diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py
index a84717664..ca6f0b76c 100644
--- a/llm_on_ray/inference/serve.py
+++ b/llm_on_ray/inference/serve.py
@@ -15,6 +15,7 @@
 #
 import ray
 import sys
+import logging
 from pydantic_yaml import parse_yaml_raw_as
 from llm_on_ray.inference.utils import get_deployment_actor_options
 from llm_on_ray.inference.api_server_simple import serve_run
@@ -130,11 +131,13 @@ def main(argv=None):
 
     args = parser.parse_args(argv)
 
-    ray.init(address="auto")
+    ray.init(address="auto", logging_level=logging.DEBUG)
     deployments, model_list = get_deployed_models(args)
     if args.simple:
         # provide simple model endpoint
         # models can be served to customed URLs according to configuration files.
+        # from api_server_simple import serve_run
+
         serve_run(deployments, model_list)
     else:
         # provide OpenAI compatible api to run LLM models

From 7446f69d837e555304398bd4a2b2a55d4d3378fe Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Fri, 14 Jun 2024 16:30:18 +0000
Subject: [PATCH 3/8] fix

---
 .../api_server_simple/query_single.py         | 35 +++------------
 llm_on_ray/inference/serve.py                 |  5 +--
 tests/inference/test_example_simple.py        | 45 ++++++++++++++++---
 3 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index 9bbd0b07c..9c1c87899 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -55,7 +55,7 @@
 )
 
 args = parser.parse_args()
-prompt = "Once upon a time,ejfgaf help me !!"
+prompt = "Once upon a time,"
 config: Dict[str, Union[int, float]] = {}
 if args.max_new_tokens:
     config["max_new_tokens"] = int(args.max_new_tokens)
@@ -75,36 +75,13 @@
     json=sample_input,
     stream=args.streaming_response,
 )
-try:
-    print(1)
-    outputs.raise_for_status()
-    print(outputs)
-    print(1)
-except requests.exceptions.HTTPError as err:
-    if "Client" in str(err):
-        import os
 
-        folder_path = "/tmp/ray/session_latest/logs/serve"
-        latest_file = None
-        latest_time = 0.0
+outputs.raise_for_status()
 
-        for file_name in os.listdir(folder_path):
-            if file_name.startswith("replica") and file_name.endswith(".log"):
-                file_path = os.path.join(folder_path, file_name)
-                file_time = os.path.getmtime(file_path)
-                if file_time > latest_time:
-                    latest_time = file_time
-                    latest_file = file_path
-        if latest_file:
-            print("latest file:", latest_file)
-            with open(latest_file, "r") as file:
-                lines = file.readlines()
-                if lines:
-                    print("Latest Internal Server Error logs:", lines)
-                else:
-                    print("Internal Server Error logs: Empty")
-    else:
-        raise err
+from requests.exceptions import HTTPError
+
+http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}"
+raise HTTPError(http_error_msg)
 
 
 if args.streaming_response:
diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py
index ca6f0b76c..a84717664 100644
--- a/llm_on_ray/inference/serve.py
+++ b/llm_on_ray/inference/serve.py
@@ -15,7 +15,6 @@
 #
 import ray
 import sys
-import logging
 from pydantic_yaml import parse_yaml_raw_as
 from llm_on_ray.inference.utils import get_deployment_actor_options
 from llm_on_ray.inference.api_server_simple import serve_run
@@ -131,13 +130,11 @@ def main(argv=None):
 
     args = parser.parse_args(argv)
 
-    ray.init(address="auto", logging_level=logging.DEBUG)
+    ray.init(address="auto")
     deployments, model_list = get_deployed_models(args)
     if args.simple:
         # provide simple model endpoint
         # models can be served to customed URLs according to configuration files.
-        # from api_server_simple import serve_run
-
         serve_run(deployments, model_list)
     else:
         # provide OpenAI compatible api to run LLM models
diff --git a/tests/inference/test_example_simple.py b/tests/inference/test_example_simple.py
index a0842f481..14e2657b3 100644
--- a/tests/inference/test_example_simple.py
+++ b/tests/inference/test_example_simple.py
@@ -53,16 +53,47 @@ def script_with_args(
     if top_k is not None:
         cmd_single.extend(["--top_k", str(top_k)])
 
-    result_query_single = subprocess.run(cmd_single, capture_output=True, text=True)
+    try:
+        result_query_single = subprocess.run(cmd_single, capture_output=True, text=True, check=True)
 
-    # Print the output of subprocess.run for checking if output is expected
-    print(result_query_single)
+        # Print the output of subprocess.run for checking if output is expected
+        print("\n" + "Model in simple output message: " + "\n", result_query_single.stdout)
 
-    # Ensure there are no errors in the OpenAI API query script execution
-    assert "Error" not in result_query_single.stderr
+        assert isinstance(result_query_single.stdout, str), print(
+            "\n" + "Simple output is nor string" + "\n"
+        )
 
-    # Returncode should be 0 when there is no exception
-    assert result_query_single.returncode == 0
+        assert len(result_query_single.stdout) > 0, print("\n" + "Simple output length is 0" + "\n")
+
+    except subprocess.CalledProcessError as e:
+        if "Internal Server Error" in e.stderr:
+            print(e.stderr)
+            # Find the latest Internal Server Error log file
+            folder_path = "/tmp/ray/session_latest/logs/serve"
+            latest_file = None
+            latest_time = 0.0
+
+            for file_name in os.listdir(folder_path):
+                if file_name.startswith("replica") and file_name.endswith(".log"):
+                    file_path = os.path.join(folder_path, file_name)
+                    file_time = os.path.getmtime(file_path)
+                    if file_time > latest_time:
+                        latest_time = file_time
+                        latest_file = file_path
+            if latest_file:
+                print("latest file:", latest_file)
+                with open(latest_file, "r") as file:
+                    lines = file.readlines()
+                    if lines:
+                        print("Latest Internal Server Error logs:", lines)
+                    else:
+                        print("Internal Server Error logs: Empty")
+            assert False, print("Internal Server Error")
+        else:
+            # Returncode should be 0 when there is no errors in exception
+            assert e.returncode == 0, print(
+                "\n" + "Simple query error stderr message: " + "\n", e.stderr
+            )
 
 
 executed_models = []

From 92954ec2713c6cc687b54a24d526a97c592ccf49 Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Fri, 14 Jun 2024 16:33:43 +0000
Subject: [PATCH 4/8] add test

---
 llm_on_ray/inference/api_server_simple.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
index 78a4221c6..f2cf0a1e7 100644
--- a/llm_on_ray/inference/api_server_simple.py
+++ b/llm_on_ray/inference/api_server_simple.py
@@ -16,15 +16,6 @@
 
 import ray
 from ray import serve
-import logging
-
-logger = logging.getLogger(__name__)
-from ray.experimental.state.api import get_log, list_logs, list_nodes, list_workers
-
-
-def custom_error_handler(request, exc):
-    logger.error("sss")
-    return {"error:": "Internal Server Error"}
 
 
 def serve_run(deployments, model_list):
@@ -32,14 +23,11 @@ def serve_run(deployments, model_list):
         print("deploy model: ", model_id)
         deployment = deployments[model_id]
 
-        serve.start(
-            http_options={"host": infer_conf.host, "port": infer_conf.port, "log_level": "DEBUG"}
-        )
+        serve.start(http_options={"host": infer_conf.host, "port": infer_conf.port})
         serve.run(
             deployment,
             name=infer_conf.name,
             route_prefix=infer_conf.route_prefix,
-            # error_handler= custom_error_handler
         )
         deployment_name = infer_conf.name
         if infer_conf.host == "0.0.0.0":
@@ -52,6 +40,5 @@ def serve_run(deployments, model_list):
             host_ip = infer_conf.host
         url = f"http://{host_ip}:{infer_conf.port}{infer_conf.route_prefix}"
         print(f"Deployment '{deployment_name}' is ready at `{url}`.")
-        for node1 in list_nodes():
-            list_logs(node_id=node1)
+
     return deployments

From 70da080a1d927487f55ef4654d30cdc09f66331f Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Tue, 25 Jun 2024 09:22:54 +0000
Subject: [PATCH 5/8] test ds

---
 tests/inference/test_example_simple.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/inference/test_example_simple.py b/tests/inference/test_example_simple.py
index 14e2657b3..37208875f 100644
--- a/tests/inference/test_example_simple.py
+++ b/tests/inference/test_example_simple.py
@@ -71,7 +71,7 @@ def script_with_args(
             # Find the latest Internal Server Error log file
             folder_path = "/tmp/ray/session_latest/logs/serve"
             latest_file = None
-            latest_time = 0.0
+            latest_time = 0.00
 
             for file_name in os.listdir(folder_path):
                 if file_name.startswith("replica") and file_name.endswith(".log"):

From 2af1c34b05f80d3ebda2d12877b72bc9a4474106 Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Tue, 25 Jun 2024 09:25:16 +0000
Subject: [PATCH 6/8] test ds

---
 examples/inference/api_server_simple/query_single.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index 9c1c87899..6246d8178 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -78,10 +78,10 @@
 
 outputs.raise_for_status()
 
-from requests.exceptions import HTTPError
+# from requests.exceptions import HTTPError
 
-http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}"
-raise HTTPError(http_error_msg)
+# http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}"
+# raise HTTPError(http_error_msg)
 
 
 if args.streaming_response:

From 532aa7f9ecf5cf0cd52aa3a5fa516b2a266f151f Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Tue, 25 Jun 2024 09:27:08 +0000
Subject: [PATCH 7/8] test ds

---
 examples/inference/api_server_simple/query_single.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index 6246d8178..a77225cb7 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -78,11 +78,6 @@
 
 outputs.raise_for_status()
 
-# from requests.exceptions import HTTPError
-
-# http_error_msg = f"{500} Server Error: Internal Server Error for url: {args.model_endpoint}"
-# raise HTTPError(http_error_msg)
-
 
 if args.streaming_response:
     for output in outputs.iter_content(chunk_size=None, decode_unicode=True):

From 7d28661699b1e64784ce33d38bc883b26256d5dd Mon Sep 17 00:00:00 2001
From: yutianchen666 <tianchen.yu@intel.com>
Date: Tue, 25 Jun 2024 09:27:43 +0000
Subject: [PATCH 8/8] test ds

---
 examples/inference/api_server_simple/query_single.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/inference/api_server_simple/query_single.py b/examples/inference/api_server_simple/query_single.py
index a77225cb7..29140faac 100644
--- a/examples/inference/api_server_simple/query_single.py
+++ b/examples/inference/api_server_simple/query_single.py
@@ -78,7 +78,6 @@
 
 outputs.raise_for_status()
 
-
 if args.streaming_response:
     for output in outputs.iter_content(chunk_size=None, decode_unicode=True):
         print(output, end="", flush=True)