Skip to content

Commit

Permalink
feat: migrate to assembled_response (#105)
Browse files Browse the repository at this point in the history
  • Loading branch information
adubovik authored Jan 23, 2025
1 parent 8b07bc0 commit 0574ebe
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 79 deletions.
51 changes: 11 additions & 40 deletions aidial_analytics_realtime/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
InfluxWriterAsync,
create_influx_writer,
)
from aidial_analytics_realtime.log_request.message import get_assembled_response
from aidial_analytics_realtime.rates import RatesCalculator
from aidial_analytics_realtime.time import parse_time
from aidial_analytics_realtime.topic_model import TopicModel
from aidial_analytics_realtime.universal_api_utils import merge
from aidial_analytics_realtime.utils.concurrency import cpu_task_executor
from aidial_analytics_realtime.utils.logging import add_logger_prefix
from aidial_analytics_realtime.utils.logging import app_logger as logger
Expand Down Expand Up @@ -91,6 +91,7 @@ async def on_chat_completion_message(
timestamp: datetime,
request: dict,
response: dict,
response_body: dict | None,
influx_writer: InfluxWriterAsync,
topic_model: TopicModel,
rates_calculator: RatesCalculator,
Expand All @@ -102,41 +103,12 @@ async def on_chat_completion_message(
if response["status"] != "200":
return

response_body = None
request_body = None
model: str | None = None

if (request_body_str := request.get("body")) is not None:

request_body = json.loads(request_body_str)
stream = request_body.get("stream", False)
model = request_body.get("model", deployment)

if stream:
body = response["body"]
chunks = body.split("\n\ndata: ")

chunks = [chunk.strip() for chunk in chunks]

chunks[0] = chunks[0][chunks[0].find("data: ") + 6 :]
if chunks[-1] == "[DONE]":
chunks.pop(len(chunks) - 1)

response_body = json.loads(chunks[-1])
for chunk in chunks[0 : len(chunks) - 1]:
chunk = json.loads(chunk)

response_body["choices"] = merge(
response_body["choices"], chunk["choices"]
)

for i in range(len(response_body["choices"])):
response_body["choices"][i]["message"] = response_body[
"choices"
][i]["delta"]
del response_body["choices"][i]["delta"]
else:
response_body = json.loads(response["body"])
model = request_body.get("model") or deployment

await on_message(
influx_writer,
Expand Down Expand Up @@ -226,17 +198,15 @@ async def on_log_message(
chat_id = message["chat"]["id"]
user_hash = message["user"]["id"]
user_title = message["user"]["title"]
upstream_url = (
response["upstream_uri"] if "upstream_uri" in response else ""
)

timestamp = parse_time(request["time"])

token_usage = message.get("token_usage", None)
trace = message.get("trace", None)
parent_deployment = message.get("parent_deployment", None)
execution_path = message.get("execution_path", None)
deployment = message.get("deployment", "")
upstream_url = response.get("upstream_uri") or ""
token_usage = message.get("token_usage")
trace = message.get("trace")
parent_deployment = message.get("parent_deployment")
execution_path = message.get("execution_path")
deployment = message.get("deployment") or ""
response_body = get_assembled_response(message)

if re.search(RATE_PATTERN, uri):
await on_rate_message(
Expand All @@ -262,6 +232,7 @@ async def on_log_message(
timestamp,
request,
response,
response_body,
influx_writer,
topic_model,
rates_calculator,
Expand Down
17 changes: 17 additions & 0 deletions aidial_analytics_realtime/log_request/message.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json


def get_assembled_response(message: dict) -> dict | None:
if (assembled_response_str := message.get("assembled_response")) is None:
return None

assembled_response = json.loads(assembled_response_str)

# NOTE: this transformation becomes redundant in ai-dial-core>=0.22.1
# due to the fix https://github.com/epam/ai-dial-core/pull/648
for choice in assembled_response.get("choices") or []:
if "delta" in choice:
choice["message"] = choice["delta"]
del choice["delta"]

return assembled_response
39 changes: 0 additions & 39 deletions aidial_analytics_realtime/universal_api_utils.py

This file was deleted.

138 changes: 138 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,29 @@ def test_chat_completion_plain_text():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -77,6 +100,29 @@ def test_chat_completion_plain_text():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-2",
"object": "chat.completion",
"created": 1700828102,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"po"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"ng"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -212,6 +258,29 @@ def test_chat_completion_list_content():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -287,6 +356,29 @@ def test_chat_completion_none_content():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "5",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"5"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -556,6 +648,29 @@ def test_data_request_with_new_format():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-1",
"object": "chat.completion",
"created": 1692214960,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down Expand Up @@ -600,6 +715,29 @@ def test_data_request_with_new_format():
}
),
},
"assembled_response": json.dumps(
{
"id": "chatcmpl-2",
"object": "chat.completion",
"created": 1700828102,
"model": "gpt-4",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "pong",
},
"finish_reason": "stop",
}
],
"usage": {
"completion_tokens": 189,
"prompt_tokens": 22,
"total_tokens": 211,
},
}
),
"response": {
"status": "200",
"body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',
Expand Down

0 comments on commit 0574ebe

Please sign in to comment.