diff --git a/phi/phi-3.5-mini/config.yaml b/phi/phi-3.5-mini/config.yaml
index 8daa8832..9beacb1f 100644
--- a/phi/phi-3.5-mini/config.yaml
+++ b/phi/phi-3.5-mini/config.yaml
@@ -1,7 +1,7 @@
 model_name: "Phi 3.5 Mini Instruct VLLM openai compatible"
 python_version: py311
 model_metadata:
-  example_model_input: {"prompt": "what is the meaning of life"}
+  example_model_input: {"messages": [{"role": "user", "content": "what is the meaning of life"}]}
   repo_id: microsoft/Phi-3.5-mini-instruct
   openai_compatible: true
   vllm_config:
diff --git a/phi/phi-3.5-mini/model/model.py b/phi/phi-3.5-mini/model/model.py
index aeed6c3f..c6b5c1e1 100644
--- a/phi/phi-3.5-mini/model/model.py
+++ b/phi/phi-3.5-mini/model/model.py
@@ -142,7 +142,7 @@ async def predict(self, model_input):
         if "messages" not in model_input and "prompt" not in model_input:
             raise ValueError("Prompt or messages must be provided")
 
-        stream = model_input.pop("stream", False)
+        stream = model_input.get("stream", False)
         if self.openai_compatible:
             # if the key metrics: true is present, let's return the vLLM /metrics endpoint
             if model_input.get("metrics", False):
diff --git a/vllm/model/model.py b/vllm/model/model.py
index aeed6c3f..c6b5c1e1 100644
--- a/vllm/model/model.py
+++ b/vllm/model/model.py
@@ -142,7 +142,7 @@ async def predict(self, model_input):
         if "messages" not in model_input and "prompt" not in model_input:
             raise ValueError("Prompt or messages must be provided")
 
-        stream = model_input.pop("stream", False)
+        stream = model_input.get("stream", False)
         if self.openai_compatible:
             # if the key metrics: true is present, let's return the vLLM /metrics endpoint
             if model_input.get("metrics", False):