From 104c44f0bf1c9ec8664a19e521b8370e83f1d4d2 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Wed, 6 Dec 2023 22:49:42 +0000 Subject: [PATCH] Expose engine timeout to config --- serve/mlc_serve/engine/base.py | 1 + serve/mlc_serve/engine/staging_engine.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/serve/mlc_serve/engine/base.py b/serve/mlc_serve/engine/base.py index 2eddf04878..ae602e8e7c 100644 --- a/serve/mlc_serve/engine/base.py +++ b/serve/mlc_serve/engine/base.py @@ -27,6 +27,7 @@ class MLCServeEngineConfig: max_num_batched_tokens: int = -1 min_decode_steps: int = 32 max_decode_steps: int = 48 + init_timeout: int = 120 @classmethod def _from_json(config_cls, json_obj: Dict[Any, Any]): diff --git a/serve/mlc_serve/engine/staging_engine.py b/serve/mlc_serve/engine/staging_engine.py index 1578abbef8..0035aab997 100644 --- a/serve/mlc_serve/engine/staging_engine.py +++ b/serve/mlc_serve/engine/staging_engine.py @@ -51,6 +51,7 @@ def __init__( model_module_loader_kwargs: dict, # maybe find a better way to do this json_log_output: bool = False, + init_timeout: int = 120, ): self.next_generation_output = None self.requests_lock = Lock() @@ -63,6 +64,7 @@ def __init__( self.command_queue = self.mp_context.Queue() self.result_queue = self.mp_context.Queue(maxsize=1) self.ready_event = self.mp_context.Event() + self.init_timeout = init_timeout self.worker_process = self.mp_context.Process( target=run_generation_loop_worker, @@ -83,7 +85,7 @@ def start(self): LOG.info("StagingInferenceEngine.start") try: self.worker_process.start() - if not self.ready_event.wait(timeout=120): + if not self.ready_event.wait(timeout=self.init_timeout): raise RuntimeError( "StagingInferenceEngine worker is not ready before timeout." )