diff --git a/phi/phi-3.5-mini/config.yaml b/phi/phi-3.5-mini/config.yaml index 8daa8832..9beacb1f 100644 --- a/phi/phi-3.5-mini/config.yaml +++ b/phi/phi-3.5-mini/config.yaml @@ -1,7 +1,7 @@ model_name: "Phi 3.5 Mini Instruct VLLM openai compatible" python_version: py311 model_metadata: - example_model_input: {"prompt": "what is the meaning of life"} + example_model_input: {"messages": [{"role": "user", "content": "what is the meaning of life"}]} repo_id: microsoft/Phi-3.5-mini-instruct openai_compatible: true vllm_config: diff --git a/phi/phi-3.5-mini/model/model.py b/phi/phi-3.5-mini/model/model.py index aeed6c3f..c6b5c1e1 100644 --- a/phi/phi-3.5-mini/model/model.py +++ b/phi/phi-3.5-mini/model/model.py @@ -142,7 +142,7 @@ async def predict(self, model_input): if "messages" not in model_input and "prompt" not in model_input: raise ValueError("Prompt or messages must be provided") - stream = model_input.pop("stream", False) + stream = model_input.get("stream", False) if self.openai_compatible: # if the key metrics: true is present, let's return the vLLM /metrics endpoint if model_input.get("metrics", False): diff --git a/vllm/model/model.py b/vllm/model/model.py index aeed6c3f..c6b5c1e1 100644 --- a/vllm/model/model.py +++ b/vllm/model/model.py @@ -142,7 +142,7 @@ async def predict(self, model_input): if "messages" not in model_input and "prompt" not in model_input: raise ValueError("Prompt or messages must be provided") - stream = model_input.pop("stream", False) + stream = model_input.get("stream", False) if self.openai_compatible: # if the key metrics: true is present, let's return the vLLM /metrics endpoint if model_input.get("metrics", False):