From ddb2cc97ab5d336bf201b83be60187f76db2026d Mon Sep 17 00:00:00 2001
From: ali asaria <aliasaria@users.noreply.github.com>
Date: Fri, 9 Feb 2024 14:04:35 -0500
Subject: [PATCH] Update MLX integration to use new generate_step function
 signature (#3021)

---
 docs/mlx_integration.md      | 6 +++---
 fastchat/serve/mlx_worker.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/mlx_integration.md b/docs/mlx_integration.md
index b500207a6..21642d948 100644
--- a/docs/mlx_integration.md
+++ b/docs/mlx_integration.md
@@ -13,11 +13,11 @@ Note that for Apple Silicon Macs with less memory, smaller models (or quantized
 1. Install MLX.
 
    ```
-   pip install mlx-lm
+   pip install "mlx-lm>=0.0.6"
    ```
 
-2. When you launch a model worker, replace the normal worker (`fastchat.serve.model_worker`) with the MLX worker (`fastchat.serve.mlx_worker`).
+2. When you launch a model worker, replace the normal worker (`fastchat.serve.model_worker`) with the MLX worker (`fastchat.serve.mlx_worker`). Remember to launch a model worker after you have launched the controller ([instructions](../README.md))
 
    ```
-   python3 -m fastchat.serve.mlx_worker --model-path microsoft/phi-2
+   python3 -m fastchat.serve.mlx_worker --model-path TinyLlama/TinyLlama-1.1B-Chat-v1.0
    ```
diff --git a/fastchat/serve/mlx_worker.py b/fastchat/serve/mlx_worker.py
index 795529104..a7e85f848 100644
--- a/fastchat/serve/mlx_worker.py
+++ b/fastchat/serve/mlx_worker.py
@@ -124,7 +124,7 @@ async def generate_stream(self, params):
         )
 
         for i in range(max_new_tokens):
-            token = await run_in_threadpool(next, iterator)
+            (token, _) = await run_in_threadpool(next, iterator)
             if token == self.mlx_tokenizer.eos_token_id:
                 finish_reason = "stop"
                 break