From d8d162bbfac4b31f47a1f04a29313bc456ba25c3 Mon Sep 17 00:00:00 2001 From: cheehook Date: Mon, 19 Aug 2024 03:30:08 +0000 Subject: [PATCH 1/5] Add custom route prefix option Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index ecd3bdee..ea4af2e3 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -131,6 +131,11 @@ def main(argv=None): parser.add_argument( "--max_batch_size", default=None, type=int, help="The max batch size for dynamic batching." ) + parser.add_argument( + "--openai_route_prefix", + action="store_true", + help="Whether to use default '/' route prefix or deploy at new route prefix.", + ) # Print help if no arguments were provided if len(sys.argv) == 1: @@ -158,7 +163,10 @@ def main(argv=None): host = "127.0.0.1" if args.serve_local_only else "0.0.0.0" print("Service is running with deployments:" + str(deployments)) print("Service is running models:" + str(model_list)) - openai_serve_run(deployments, model_list, host, "/", args.port, args.max_ongoing_requests) + if args.openai_route_prefix: + openai_serve_run(deployments, model_list, host, "/" + args.openai_route_prefix, args.port, args.max_ongoing_requests) + else: + openai_serve_run(deployments, model_list, host, "/", args.port, args.max_ongoing_requests) msg = "Service is deployed successfully." if args.keep_serve_terminal: From cf81ec4b51d1e7bb682c4edba2e7f40986e7fb99 Mon Sep 17 00:00:00 2001 From: cheehook Date: Mon, 19 Aug 2024 09:27:52 +0000 Subject: [PATCH 2/5] fix the option with proper properties Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index ea4af2e3..873caa33 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -133,7 +133,8 @@ def main(argv=None): ) parser.add_argument( "--openai_route_prefix", - action="store_true", + default=None, + type=str, help="Whether to use default '/' route prefix or deploy at new route prefix.", ) From d3bd0527ee46c60c788875a0b9d2610c7feac481 Mon Sep 17 00:00:00 2001 From: cheehook Date: Fri, 23 Aug 2024 07:55:20 +0000 Subject: [PATCH 3/5] user will enter their own route prefix with --openai_route_prefix option Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index 7e9cca30..6f5e7da7 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -154,9 +154,9 @@ def main(argv=None): ) parser.add_argument( "--openai_route_prefix", - default=None, + default="/", type=str, - help="Whether to use default '/' route prefix or deploy at new route prefix.", + help="The openai_route_prefix must start with a forward slash ('/')", ) # Print help if no arguments were provided @@ -186,24 +186,15 @@ def main(argv=None): print("Service is running with deployments:" + str(deployments)) print("Service is running models:" + str(model_list)) - if args.openai_route_prefix: - openai_serve_run( - deployments, - model_list, - host, - "/" + args.openai_route_prefix, - args.port, - args.max_ongoing_requests, - args.max_num_seqs,) - else: - openai_serve_run( - deployments, - model_list, - host, - "/", - args.port, - args.max_ongoing_requests, - args.max_num_seqs, + openai_serve_run( + deployments, + model_list, + host, + args.openai_route_prefix, + args.port, + args.max_ongoing_requests, + args.max_num_seqs, + ) msg = "Service is deployed successfully." if args.keep_serve_terminal: From 7d01cd98556040698835e300b4628fd2c90a89b1 Mon Sep 17 00:00:00 2001 From: cheehook Date: Fri, 23 Aug 2024 11:23:31 +0000 Subject: [PATCH 4/5] add application name argument for openai server Signed-off-by: cheehook --- llm_on_ray/inference/api_server_openai.py | 4 ++-- llm_on_ray/inference/serve.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py index dcc1ee85..2d9ae7be 100644 --- a/llm_on_ray/inference/api_server_openai.py +++ b/llm_on_ray/inference/api_server_openai.py @@ -72,14 +72,14 @@ def router_application(deployments, model_list, max_ongoing_requests, max_num_se def openai_serve_run( - deployments, model_list, host, route_prefix, port, max_ongoing_requests, max_num_seqs + deployments, model_list, host, route_prefix, application_name, port, max_ongoing_requests, max_num_seqs ): router_app = router_application(deployments, model_list, max_ongoing_requests, max_num_seqs) serve.start(http_options={"host": host, "port": port}) serve.run( router_app, - name="router", + name=application_name, route_prefix=route_prefix, ).options( stream=True, diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index 6f5e7da7..a800444f 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -158,6 +158,12 @@ def main(argv=None): type=str, help="The openai_route_prefix must start with a forward slash ('/')", ) + parser.add_argument( + "--openai_application_name", + default="router", + type=str, + help="If not specified, the application name will be 'router'.", + ) # Print help if no arguments were provided if len(sys.argv) == 1: @@ -191,6 +197,7 @@ def main(argv=None): model_list, host, args.openai_route_prefix, + args.openai_application_name, args.port, args.max_ongoing_requests, args.max_num_seqs, From caae834f76b61cdc23c9c8e2c35430b2d3f1c4af Mon Sep 17 00:00:00 2001 From: cheehook Date: Wed, 28 Aug 2024 03:29:45 +0000 Subject: [PATCH 5/5] fix linting Signed-off-by: cheehook --- llm_on_ray/inference/api_server_openai.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py index 2d9ae7be..c5be3f8f 100644 --- a/llm_on_ray/inference/api_server_openai.py +++ b/llm_on_ray/inference/api_server_openai.py @@ -72,7 +72,14 @@ def router_application(deployments, model_list, max_ongoing_requests, max_num_se def openai_serve_run( - deployments, model_list, host, route_prefix, application_name, port, max_ongoing_requests, max_num_seqs + deployments, + model_list, + host, + route_prefix, + application_name, + port, + max_ongoing_requests, + max_num_seqs, ): router_app = router_application(deployments, model_list, max_ongoing_requests, max_num_seqs)