Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: migrate to assembled_response #105

Merged
merged 7 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 11 additions & 40 deletions aidial_analytics_realtime/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
InfluxWriterAsync,
create_influx_writer,
)
from aidial_analytics_realtime.log_request.message import get_assembled_response
from aidial_analytics_realtime.rates import RatesCalculator
from aidial_analytics_realtime.time import parse_time
from aidial_analytics_realtime.topic_model import TopicModel
from aidial_analytics_realtime.universal_api_utils import merge
from aidial_analytics_realtime.utils.concurrency import cpu_task_executor
from aidial_analytics_realtime.utils.logging import add_logger_prefix
from aidial_analytics_realtime.utils.logging import app_logger as logger
Expand Down Expand Up @@ -91,6 +91,7 @@ async def on_chat_completion_message(
timestamp: datetime,
request: dict,
response: dict,
response_body: dict | None,
influx_writer: InfluxWriterAsync,
topic_model: TopicModel,
rates_calculator: RatesCalculator,
Expand All @@ -102,41 +103,12 @@ async def on_chat_completion_message(
if response["status"] != "200":
return

response_body = None
request_body = None
model: str | None = None

if (request_body_str := request.get("body")) is not None:

request_body = json.loads(request_body_str)
stream = request_body.get("stream", False)
model = request_body.get("model", deployment)

if stream:
body = response["body"]
chunks = body.split("\n\ndata: ")

chunks = [chunk.strip() for chunk in chunks]

chunks[0] = chunks[0][chunks[0].find("data: ") + 6 :]
if chunks[-1] == "[DONE]":
chunks.pop(len(chunks) - 1)

response_body = json.loads(chunks[-1])
for chunk in chunks[0 : len(chunks) - 1]:
chunk = json.loads(chunk)

response_body["choices"] = merge(
response_body["choices"], chunk["choices"]
)

for i in range(len(response_body["choices"])):
response_body["choices"][i]["message"] = response_body[
"choices"
][i]["delta"]
del response_body["choices"][i]["delta"]
else:
response_body = json.loads(response["body"])
model = request_body.get("model") or deployment

await on_message(
influx_writer,
Expand Down Expand Up @@ -226,17 +198,15 @@ async def on_log_message(
chat_id = message["chat"]["id"]
user_hash = message["user"]["id"]
user_title = message["user"]["title"]
upstream_url = (
response["upstream_uri"] if "upstream_uri" in response else ""
)

timestamp = parse_time(request["time"])

token_usage = message.get("token_usage", None)
trace = message.get("trace", None)
parent_deployment = message.get("parent_deployment", None)
execution_path = message.get("execution_path", None)
deployment = message.get("deployment", "")
upstream_url = response.get("upstream_uri") or ""
token_usage = message.get("token_usage")
trace = message.get("trace")
parent_deployment = message.get("parent_deployment")
execution_path = message.get("execution_path")
deployment = message.get("deployment") or ""
response_body = get_assembled_response(message)

if re.search(RATE_PATTERN, uri):
await on_rate_message(
Expand All @@ -262,6 +232,7 @@ async def on_log_message(
timestamp,
request,
response,
response_body,
influx_writer,
topic_model,
rates_calculator,
Expand Down
17 changes: 17 additions & 0 deletions aidial_analytics_realtime/log_request/message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json


def get_assembled_response(message: dict) -> dict | None:
if (assembled_response_str := message.get("assembled_response")) is None:
return None

assembled_response = json.loads(assembled_response_str)

# NOTE: this transformation becomes redundant in ai-dial-core>=0.22.1
# due to the fix https://github.com/epam/ai-dial-core/pull/648
for choice in assembled_response.get("choices") or []:
if "delta" in choice:
choice["message"] = choice["delta"]
del choice["delta"]

return assembled_response
39 changes: 0 additions & 39 deletions aidial_analytics_realtime/universal_api_utils.py

This file was deleted.

138 changes: 138 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,29 @@ def test_chat_completion_plain_text():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -77,6 +100,29 @@ def test_chat_completion_plain_text():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-2",
"object": "chat.completion",
"created": 1700828102,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"po"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"ng"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -212,6 +258,29 @@ def test_chat_completion_list_content():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -287,6 +356,29 @@ def test_chat_completion_none_content():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "5",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"5"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -556,6 +648,29 @@ def test_data_request_with_new_format():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -600,6 +715,29 @@ def test_data_request_with_new_format():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-2",
"object": "chat.completion",
"created": 1700828102,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down
Loading