diff --git a/serve/mlc_serve/engine/async_connector.py b/serve/mlc_serve/engine/async_connector.py index 794bc759bd..4819bb3667 100644 --- a/serve/mlc_serve/engine/async_connector.py +++ b/serve/mlc_serve/engine/async_connector.py @@ -87,6 +87,7 @@ async def generate(self, request: Request) -> AsyncIterator[RequestOutput]: LOG.info("AsyncEngineConnector.generate iterator cancelled.", request_id=request.request_id) await asyncio.shield(asyncio.to_thread(self.engine.cancel, request.request_id)) LOG.info("AsyncEngineConnector.generate request sucessfully cancelled.", request_id=request.request_id) + raise finally: LOG.info("AsyncEngineConnector.generate removing request from result queue.", request_id=request.request_id) self.result_queues.pop(request.request_id, None)