fix bug of serving llama2 on UI (intel#120)

JoshuaL3000 · Nov 16, 2023 · 62831b1 · 62831b1
1 parent 21d53aa
commit 62831b1
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 4 deletions.
diff --git a/inference/chat_process.py b/inference/chat_process.py
@@ -84,7 +84,7 @@ def prepare_prompt(self, messages: list):
             role, content = msg["role"], msg["content"]
             if role == "user":
                 if self.human_id != "":
-                    prompt += self.human_id.format(content)
+                    prompt += self.human_id.format(msg=content)
                 else:
                     prompt += f"{content}\n"
             elif role == "assistant":

diff --git a/inference/config.py b/inference/config.py
@@ -119,7 +119,7 @@
     }
 }
 
-llama2 = {
+llama2_7b = {
     "model_id_or_path": "meta-llama/Llama-2-7b-chat-hf",
     "tokenizer_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
     "port": "8000",
@@ -143,7 +143,7 @@
     "bloom": bloom,
     "opt": opt,
     "mpt": mpt,
-    "llama2": llama2
+    "llama2_7b": llama2_7b
 }
 
 env_model = "MODEL_TO_SERVE"
@@ -155,3 +155,4 @@
 
 base_models["gpt2"] = gpt2
 base_models["gpt-j-6B"] = gpt_j_6B
+base_models["llama2-7b"] = llama2_7b
diff --git a/inference/start_ui.py b/inference/start_ui.py
@@ -2,7 +2,7 @@
 from config import all_models, base_models
 import time
 import os
-from chat_process import ChatModelGptJ
+from chat_process import ChatModelGptJ, ChatModelLLama
 import torch
 from run_model_serve import PredictDeployment
 from ray import serve