-
Notifications
You must be signed in to change notification settings - Fork 30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Infer] Add Simple Protocol for simple request and response #244
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
7c4a295
add simpleclass
yutianchen666 ec2b4b5
Merge branch 'intel:main' into sim_req
yutianchen666 7c85521
add dynamic_batch
yutianchen666 34407cb
add server request
yutianchen666 7545708
afix numpy
yutianchen666 aee3429
update to 2.3
yutianchen666 31fadc0
fix np
yutianchen666 8fa20a3
Merge branch 'intel:main' into sim_req
yutianchen666 1af2406
fix
yutianchen666 d8c0119
fix
yutianchen666 18b4a25
add pytest
yutianchen666 cdc3b8c
fix name
yutianchen666 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
llm_on_ray/inference/api_simple_backend/simple_protocol.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# | ||
# Copyright 2023 The LLM-on-Ray Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from typing import Dict, Optional, Union, Iterator, List | ||
import requests | ||
from pydantic import BaseModel, ValidationError, validator | ||
|
||
|
||
class SimpleRequest(BaseModel): | ||
text: str | ||
config: Dict[str, Union[int, float]] = {} | ||
stream: Optional[bool] = False | ||
|
||
@validator("text") | ||
def text_must_not_be_empty(cls, v): | ||
if not v.strip(): | ||
raise ValueError("Empty prompt is not supported.") | ||
return v | ||
|
||
@validator("config", pre=True) | ||
def check_config_type(cls, value): | ||
allowed_keys = ["max_new_tokens", "temperature", "top_p", "top_k"] | ||
allowed_set = set(allowed_keys) | ||
config_dict = value.keys() | ||
config_keys = [key for key in config_dict] | ||
config_set = set(config_keys) | ||
|
||
if not isinstance(value, dict): | ||
raise ValueError("Config must be a dictionary") | ||
|
||
if not all(isinstance(key, str) for key in value.keys()): | ||
raise ValueError("All keys in config must be strings") | ||
|
||
if not all(isinstance(val, (int, float)) for val in value.values()): | ||
raise ValueError("All values in config must be integers or floats") | ||
|
||
if not config_set.issubset(allowed_set): | ||
invalid_keys = config_set - allowed_set | ||
raise ValueError(f'Invalid config keys: {", ".join(invalid_keys)}') | ||
|
||
return value | ||
|
||
@validator("stream", pre=True) | ||
def check_stream_type(cls, value): | ||
if not isinstance(value, bool) and value is not None: | ||
raise ValueError("Stream must be a boolean or None") | ||
return value | ||
|
||
|
||
class SimpleModelResponse(BaseModel): | ||
headers: Dict[str, str] | ||
text: str | ||
content: bytes | ||
status_code: int | ||
url: str | ||
|
||
class Config: | ||
arbitrary_types_allowed = True | ||
|
||
response: Optional[requests.Response] = None | ||
|
||
@staticmethod | ||
def from_requests_response(response: requests.Response): | ||
return SimpleModelResponse( | ||
headers=dict(response.headers), | ||
text=response.text, | ||
content=response.content, | ||
status_code=response.status_code, | ||
url=response.url, | ||
response=response, | ||
) | ||
|
||
def iter_content( | ||
self, chunk_size: Optional[int] = 1, decode_unicode: bool = False | ||
) -> Iterator[Union[bytes, str]]: | ||
if self.response is not None: | ||
return self.response.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode) | ||
else: | ||
return iter([]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# | ||
# Copyright 2023 The LLM-on-Ray Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
import subprocess | ||
import pytest | ||
import os | ||
from basic_set import start_serve | ||
import requests | ||
from llm_on_ray.inference.api_simple_backend.simple_protocol import ( | ||
SimpleRequest, | ||
SimpleModelResponse, | ||
) | ||
|
||
|
||
executed_models = [] | ||
|
||
|
||
# Parametrize the test function with different combinations of parameters | ||
# TODO: more models and combinations will be added and tested. | ||
@pytest.mark.parametrize( | ||
"prompt,streaming_response,max_new_tokens,temperature,top_p, top_k", | ||
[ | ||
( | ||
prompt, | ||
streaming_response, | ||
max_new_tokens, | ||
temperature, | ||
top_p, | ||
top_k, | ||
) | ||
for prompt in ["Once upon a time", ""] | ||
for streaming_response in [None, True, "error"] | ||
for max_new_tokens in [None, 128, "error"] | ||
for temperature in [None] | ||
for top_p in [None] | ||
for top_k in [None] | ||
], | ||
) | ||
def test_script(prompt, streaming_response, max_new_tokens, temperature, top_p, top_k): | ||
global executed_models | ||
|
||
# Check if this modelname has already executed start_serve | ||
if "gpt2" not in executed_models: | ||
start_serve("gpt2", simple=True) | ||
# Mark this modelname has already executed start_serve | ||
executed_models.append("gpt2") | ||
config = {} | ||
if max_new_tokens: | ||
config["max_new_tokens"] = max_new_tokens | ||
if temperature: | ||
config["temperature"] = temperature | ||
if top_p: | ||
config["top_p"] = top_p | ||
if top_k: | ||
config["top_k"] = top_k | ||
|
||
try: | ||
sample_input = SimpleRequest(text=prompt, config=config, stream=streaming_response) | ||
except ValueError as e: | ||
print(e) | ||
return | ||
outputs = requests.post( | ||
"http://localhost:8000/gpt2", | ||
proxies={"http": None, "https": None}, # type: ignore | ||
json=sample_input.dict(), | ||
stream=streaming_response, | ||
) | ||
|
||
outputs.raise_for_status() | ||
|
||
simple_response = SimpleModelResponse.from_requests_response(outputs) | ||
if streaming_response: | ||
for output in simple_response.iter_content(chunk_size=1, decode_unicode=True): | ||
print(output, end="", flush=True) | ||
print() | ||
else: | ||
print(simple_response.text, flush=True) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
filename: protocal => protocol