Address review feedback regarding chat template and inference request…

… handling Refactor tokenizer model names to be more descriptive
NeonGeckoCom · Dec 23, 2024 · 370e5f3 · 370e5f3
1 parent 82d8e1b
commit 370e5f3
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 8 deletions.
diff --git a/neon_data_models/models/api/llm.py b/neon_data_models/models/api/llm.py
@@ -165,7 +165,7 @@ def to_completion_kwargs(self, mq2role: dict = None) -> dict:
         history = self.messages[-2*self.max_history:]
         for msg in history:
             msg["role"] = mq2role.get(msg["role"]) or msg["role"]
-        if self.persona.system_prompt:
+        if self.persona.system_prompt is not None:
             history.insert(0, {"role": "system",
                                "content": self.persona.system_prompt})
         history.append({"role": "user", "content": self.query})

diff --git a/neon_data_models/models/api/mq/brainforge.py b/neon_data_models/models/api/mq/brainforge.py
@@ -24,7 +24,7 @@
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from typing import List, Optional, Any, Dict, Literal
+from typing import List, Optional, Any, Dict, Literal, Union
 from pydantic import Field
 
 from neon_data_models.models.base.contexts import MQContext
@@ -85,15 +85,22 @@ class LLMGetCompletionResponse(MQContext):
         description="OpenAI ChatCompletion model")
 
 
-class LLMGetTokenizerChatTemplate(LLMGetModels):
+class LLMGetTokenizerChatTemplatedString(LLMGetModels):
     model: str = Field(description="Model to request (<name>@<revision>)")
     messages: List[Dict[Literal["role", "content"], str]] = Field(
         description="List of dict messages in OpenAI format")
-    tokenize: bool = Field(False)
-
-
-class LLMGetTokenizerChatTemplateResponse(MQContext):
-    prompt: str = Field(description="Prompt generated by the tokenizer")
+    add_generation_prompt: bool = Field(
+        description="If true, assistant start tokens will be appended to the "
+                    "formatted output.")
+    tokenize: bool = Field(
+        False,
+        description="If true, a list of token strings is returned, "
+                    "else a single string")
+
+
+class LLMGetTokenizerChatTemplatedStringResponse(MQContext):
+    prompt: Union[List[str], str] = Field(
+        description="Prompt generated by the tokenizer")
 
 
 class LLMGetInferenceResponse(LLMResponse, MQContext):