Skip to content

Commit

Permalink
feat(model): Support llama.cpp server deploy (#2263)
Browse files Browse the repository at this point in the history
  • Loading branch information
fangyinc authored Jan 2, 2025
1 parent 576da34 commit 0b2af2e
Show file tree
Hide file tree
Showing 14 changed files with 823 additions and 44 deletions.
171 changes: 163 additions & 8 deletions dbgpt/core/schema/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ class APIChatCompletionRequest(BaseModel):
presence_penalty: Optional[float] = Field(0.0, description="Presence penalty")


class UsageInfo(BaseModel):
"""Usage info entity."""

prompt_tokens: int = Field(0, description="Prompt tokens")
total_tokens: int = Field(0, description="Total tokens")
completion_tokens: Optional[int] = Field(0, description="Completion tokens")


class DeltaMessage(BaseModel):
"""Delta message entity for chat completion response."""

Expand Down Expand Up @@ -95,6 +103,7 @@ class ChatCompletionStreamResponse(BaseModel):
choices: List[ChatCompletionResponseStreamChoice] = Field(
..., description="Chat completion response choices"
)
usage: UsageInfo = Field(..., description="Usage info")


class ChatMessage(BaseModel):
Expand All @@ -104,14 +113,6 @@ class ChatMessage(BaseModel):
content: str = Field(..., description="Content of the message")


class UsageInfo(BaseModel):
"""Usage info entity."""

prompt_tokens: int = Field(0, description="Prompt tokens")
total_tokens: int = Field(0, description="Total tokens")
completion_tokens: Optional[int] = Field(0, description="Completion tokens")


class ChatCompletionResponseChoice(BaseModel):
"""Chat completion response choice entity."""

Expand Down Expand Up @@ -256,3 +257,157 @@ class ErrorCode(IntEnum):
GRADIO_STREAM_UNKNOWN_ERROR = 50004
CONTROLLER_NO_WORKER = 50005
CONTROLLER_WORKER_TIMEOUT = 50006


class CompletionRequest(BaseModel):
"""Completion request entity."""

model: str = Field(..., description="Model name")
prompt: Union[str, List[Any]] = Field(
...,
description="Provide the prompt for this completion as a string or as an "
"array of strings or numbers representing tokens",
)
suffix: Optional[str] = Field(
None,
description="Suffix to append to the completion. If provided, the model will "
"stop generating upon reaching this suffix",
)
temperature: Optional[float] = Field(
0.8,
description="Adjust the randomness of the generated text. Default: `0.8`",
)
n: Optional[int] = Field(
1,
description="Number of completions to generate. Default: `1`",
)
max_tokens: Optional[int] = Field(
16,
description="The maximum number of tokens that can be generated in the "
"completion. Default: `16`",
)
stop: Optional[Union[str, List[str]]] = Field(
None,
description="Up to 4 sequences where the API will stop generating further "
"tokens. The returned text will not contain the stop sequence.",
)
stream: Optional[bool] = Field(
False,
description="Whether to stream back partial completions. Default: `False`",
)
top_p: Optional[float] = Field(
1.0,
description="Limit the next token selection to a subset of tokens with a "
"cumulative probability above a threshold P. Default: `1.0`",
)
top_k: Optional[int] = Field(
-1,
description="Limit the next token selection to the K most probable tokens. "
"Default: `-1`",
)
logprobs: Optional[int] = Field(
None,
description="Modify the likelihood of specified tokens appearing in the "
"completion.",
)
echo: Optional[bool] = Field(
False, description="Echo back the prompt in addition to the completion"
)
presence_penalty: Optional[float] = Field(
0.0,
description="Number between -2.0 and 2.0. Positive values penalize new tokens "
"based on whether they appear in the text so far, increasing the model's "
"likelihood to talk about new topics.",
)
frequency_penalty: Optional[float] = Field(
0.0,
description="Number between -2.0 and 2.0. Positive values penalize new tokens "
"based on their existing frequency in the text so far, decreasing the model's "
"likelihood to repeat the same line verbatim.",
)
user: Optional[str] = Field(
None,
description="A unique identifier representing your end-user, which can help "
"OpenAI to monitor and detect abuse.",
)
use_beam_search: Optional[bool] = False
best_of: Optional[int] = Field(
1,
description='Generates best_of completions server-side and returns the "best" '
"(the one with the highest log probability per token). Results cannot be "
"streamed. When used with n, best_of controls the number of candidate "
"completions and n specifies how many to return – best_of must be greater than "
"n.",
)


class LogProbs(BaseModel):
"""Logprobs entity."""

text_offset: List[int] = Field(default_factory=list, description="Text offset")
token_logprobs: List[Optional[float]] = Field(
default_factory=list, description="Token logprobs"
)
tokens: List[str] = Field(default_factory=list, description="Tokens")
top_logprobs: List[Optional[Dict[str, float]]] = Field(
default_factory=list, description="Top logprobs"
)


class CompletionResponseChoice(BaseModel):
"""Completion response choice entity."""

index: int = Field(..., description="Choice index")
text: str = Field(..., description="Text")
logprobs: Optional[LogProbs] = Field(None, description="Logprobs")
finish_reason: Optional[Literal["stop", "length"]] = Field(
None, description="The reason the model stopped generating tokens."
)


class CompletionResponse(BaseModel):
"""Completion response entity."""

id: str = Field(default_factory=lambda: f"cmpl-{str(uuid.uuid1())}")
object: str = Field(
"text_completion",
description="The object type, which is always 'text_completion'",
)
created: int = Field(
default_factory=lambda: int(time.time()), description="Created time"
)
model: str = Field(..., description="Model name")
choices: List[CompletionResponseChoice] = Field(
...,
description="The list of completion choices the model generated for the input "
"prompt.",
)
usage: UsageInfo = Field(..., description="Usage info")


class CompletionResponseStreamChoice(BaseModel):
"""Completion response choice entity."""

index: int = Field(..., description="Choice index")
text: str = Field(..., description="Text")
logprobs: Optional[LogProbs] = Field(None, description="Logprobs")
finish_reason: Optional[Literal["stop", "length"]] = Field(
None, description="The reason the model stopped generating tokens."
)


class CompletionStreamResponse(BaseModel):
"""Completion stream response entity."""

id: str = Field(
default_factory=lambda: f"cmpl-{str(uuid.uuid1())}", description="Stream ID"
)
object: str = Field("text_completion", description="Object type")
created: int = Field(
default_factory=lambda: int(time.time()), description="Created time"
)
model: str = Field(..., description="Model name")
choices: List[CompletionResponseStreamChoice] = Field(
..., description="Completion response choices"
)
usage: UsageInfo = Field(..., description="Usage info")
16 changes: 16 additions & 0 deletions dbgpt/model/adapter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ def support_async(self) -> bool:
"""Whether the loaded model supports asynchronous calls"""
return False

def support_generate_function(self) -> bool:
"""Whether support generate function, if it is False, we will use
generate_stream function.
Sometimes, we need to use generate function to get the result of the model.
"""
return False

def get_generate_stream_function(self, model, model_path: str):
"""Get the generate stream function of the model"""
raise NotImplementedError
Expand All @@ -153,6 +161,14 @@ def get_async_generate_stream_function(self, model, model_path: str):
"""Get the asynchronous generate stream function of the model"""
raise NotImplementedError

def get_generate_function(self, model, model_path: str):
"""Get the generate function of the model"""
raise NotImplementedError

def get_async_generate_function(self, model, model_path: str):
"""Get the asynchronous generate function of the model"""
raise NotImplementedError

def get_default_conv_template(
self, model_name: str, model_path: str
) -> Optional[ConversationAdapter]:
Expand Down
Loading

0 comments on commit 0b2af2e

Please sign in to comment.