From d8d162bbfac4b31f47a1f04a29313bc456ba25c3 Mon Sep 17 00:00:00 2001 From: cheehook Date: Mon, 19 Aug 2024 03:30:08 +0000 Subject: [PATCH 1/3] Add custom route prefix option Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index ecd3bdee..ea4af2e3 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -131,6 +131,11 @@ def main(argv=None): parser.add_argument( "--max_batch_size", default=None, type=int, help="The max batch size for dynamic batching." ) + parser.add_argument( + "--openai_route_prefix", + action="store_true", + help="Whether to use default '/' route prefix or deploy at new route prefix.", + ) # Print help if no arguments were provided if len(sys.argv) == 1: @@ -158,7 +163,10 @@ def main(argv=None): host = "127.0.0.1" if args.serve_local_only else "0.0.0.0" print("Service is running with deployments:" + str(deployments)) print("Service is running models:" + str(model_list)) - openai_serve_run(deployments, model_list, host, "/", args.port, args.max_ongoing_requests) + if args.openai_route_prefix: + openai_serve_run(deployments, model_list, host, "/" + args.openai_route_prefix, args.port, args.max_ongoing_requests) + else: + openai_serve_run(deployments, model_list, host, "/", args.port, args.max_ongoing_requests) msg = "Service is deployed successfully." if args.keep_serve_terminal: From cf81ec4b51d1e7bb682c4edba2e7f40986e7fb99 Mon Sep 17 00:00:00 2001 From: cheehook Date: Mon, 19 Aug 2024 09:27:52 +0000 Subject: [PATCH 2/3] fix the option with proper properties Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index ea4af2e3..873caa33 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -133,7 +133,8 @@ def main(argv=None): ) parser.add_argument( "--openai_route_prefix", - action="store_true", + default=None, + type=str, help="Whether to use default '/' route prefix or deploy at new route prefix.", ) From d3bd0527ee46c60c788875a0b9d2610c7feac481 Mon Sep 17 00:00:00 2001 From: cheehook Date: Fri, 23 Aug 2024 07:55:20 +0000 Subject: [PATCH 3/3] user will enter their own route prefix with --openai_route_prefix option Signed-off-by: cheehook --- llm_on_ray/inference/serve.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/llm_on_ray/inference/serve.py b/llm_on_ray/inference/serve.py index 7e9cca30..6f5e7da7 100644 --- a/llm_on_ray/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -154,9 +154,9 @@ def main(argv=None): ) parser.add_argument( "--openai_route_prefix", - default=None, + default="/", type=str, - help="Whether to use default '/' route prefix or deploy at new route prefix.", + help="The openai_route_prefix must start with a forward slash ('/')", ) # Print help if no arguments were provided @@ -186,24 +186,15 @@ def main(argv=None): print("Service is running with deployments:" + str(deployments)) print("Service is running models:" + str(model_list)) - if args.openai_route_prefix: - openai_serve_run( - deployments, - model_list, - host, - "/" + args.openai_route_prefix, - args.port, - args.max_ongoing_requests, - args.max_num_seqs,) - else: - openai_serve_run( - deployments, - model_list, - host, - "/", - args.port, - args.max_ongoing_requests, - args.max_num_seqs, + openai_serve_run( + deployments, + model_list, + host, + args.openai_route_prefix, + args.port, + args.max_ongoing_requests, + args.max_num_seqs, + ) msg = "Service is deployed successfully." if args.keep_serve_terminal: