Skip to content

Commit

Permalink
fix(api): bump max tokens to maximum for Synthia-7B to prevent choppi…
Browse files Browse the repository at this point in the history
…ng (#699)

* bump max prompt and completions tokens to 4096, per Synthia-7B, to prevent chopping of generated responses
  • Loading branch information
gphorvath authored Jul 2, 2024
1 parent 7dab8bd commit ef7e098
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
14 changes: 11 additions & 3 deletions src/leapfrogai_api/backend/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
from openai.types.beta.vector_store import ExpiresAfter
from pydantic import BaseModel, Field

##########
# DEFAULTS
##########


DEFAULT_MAX_COMPLETION_TOKENS = 4096
DEFAULT_MAX_PROMPT_TOKENS = 4096


##########
# GENERIC
Expand All @@ -34,7 +42,7 @@ class Usage(BaseModel):
"""Usage object."""

prompt_tokens: int
completion_tokens: int | None = None
completion_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS
total_tokens: int


Expand Down Expand Up @@ -70,7 +78,7 @@ class CompletionRequest(BaseModel):
model: str
prompt: str | list[int]
stream: bool | None = False
max_tokens: int | None = 16
max_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS
temperature: float | None = 1.0


Expand Down Expand Up @@ -131,7 +139,7 @@ class ChatCompletionRequest(BaseModel):
top_p: float | None = 1
stream: bool | None = False
stop: str | None = None
max_tokens: int | None = 128
max_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS


class ChatChoice(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
ChatCompletionResponse,
ChatCompletionRequest,
ChatChoice,
DEFAULT_MAX_COMPLETION_TOKENS,
DEFAULT_MAX_PROMPT_TOKENS,
)
from leapfrogai_api.data.crud_assistant import CRUDAssistant, FilterAssistant
from leapfrogai_api.data.crud_message import CRUDMessage
Expand All @@ -71,8 +73,12 @@
class RunCreateParamsRequestBase(BaseModel):
assistant_id: str = Field(default="", examples=["123ab"])
instructions: str = Field(default="", examples=["You are a helpful AI assistant."])
max_completion_tokens: int | None = Field(default=1024, examples=[4096])
max_prompt_tokens: int | None = Field(default=1024, examples=[32768])
max_completion_tokens: int | None = Field(
default=DEFAULT_MAX_COMPLETION_TOKENS, examples=[DEFAULT_MAX_COMPLETION_TOKENS]
)
max_prompt_tokens: int | None = Field(
default=DEFAULT_MAX_PROMPT_TOKENS, examples=[DEFAULT_MAX_PROMPT_TOKENS]
)
metadata: dict | None = Field(default={}, examples=[{}])
model: str | None = Field(default=None, examples=["llama-cpp-python"])
response_format: AssistantResponseFormatOption | None = Field(
Expand All @@ -96,14 +102,16 @@ def __init__(self, **data):
# TODO: Temporary fix to ensure max_completion_tokens and max_prompt_tokens are set
if self.max_completion_tokens is None or self.max_completion_tokens < 1:
logging.warning(
"max_completion_tokens is not set or is less than 1, setting to 1024"
"max_completion_tokens is not set or is less than 1, setting to %s",
DEFAULT_MAX_COMPLETION_TOKENS,
)
self.max_completion_tokens = 1024
self.max_completion_tokens = DEFAULT_MAX_COMPLETION_TOKENS
if self.max_prompt_tokens is None or self.max_prompt_tokens < 1:
logging.warning(
"max_prompt_tokens is not set or is less than 1, setting to 1024"
"max_prompt_tokens is not set or is less than 1, setting to %s",
DEFAULT_MAX_PROMPT_TOKENS,
)
self.max_prompt_tokens = 1024
self.max_prompt_tokens = DEFAULT_MAX_PROMPT_TOKENS

@staticmethod
def get_initial_messages_base(run: Run) -> list[str]:
Expand Down

0 comments on commit ef7e098

Please sign in to comment.