diff --git a/aidial_analytics_realtime/app.py b/aidial_analytics_realtime/app.py index f26fcd0..cbc9b52 100644 --- a/aidial_analytics_realtime/app.py +++ b/aidial_analytics_realtime/app.py @@ -21,10 +21,10 @@ InfluxWriterAsync, create_influx_writer, ) +from aidial_analytics_realtime.log_request.message import get_assembled_response from aidial_analytics_realtime.rates import RatesCalculator from aidial_analytics_realtime.time import parse_time from aidial_analytics_realtime.topic_model import TopicModel -from aidial_analytics_realtime.universal_api_utils import merge from aidial_analytics_realtime.utils.concurrency import cpu_task_executor from aidial_analytics_realtime.utils.logging import add_logger_prefix from aidial_analytics_realtime.utils.logging import app_logger as logger @@ -91,6 +91,7 @@ async def on_chat_completion_message( timestamp: datetime, request: dict, response: dict, + response_body: dict | None, influx_writer: InfluxWriterAsync, topic_model: TopicModel, rates_calculator: RatesCalculator, @@ -102,41 +103,12 @@ async def on_chat_completion_message( if response["status"] != "200": return - response_body = None request_body = None model: str | None = None if (request_body_str := request.get("body")) is not None: - request_body = json.loads(request_body_str) - stream = request_body.get("stream", False) - model = request_body.get("model", deployment) - - if stream: - body = response["body"] - chunks = body.split("\n\ndata: ") - - chunks = [chunk.strip() for chunk in chunks] - - chunks[0] = chunks[0][chunks[0].find("data: ") + 6 :] - if chunks[-1] == "[DONE]": - chunks.pop(len(chunks) - 1) - - response_body = json.loads(chunks[-1]) - for chunk in chunks[0 : len(chunks) - 1]: - chunk = json.loads(chunk) - - response_body["choices"] = merge( - response_body["choices"], chunk["choices"] - ) - - for i in range(len(response_body["choices"])): - response_body["choices"][i]["message"] = response_body[ - "choices" - ][i]["delta"] - del response_body["choices"][i]["delta"] - else: - response_body = json.loads(response["body"]) + model = request_body.get("model") or deployment await on_message( influx_writer, @@ -226,17 +198,15 @@ async def on_log_message( chat_id = message["chat"]["id"] user_hash = message["user"]["id"] user_title = message["user"]["title"] - upstream_url = ( - response["upstream_uri"] if "upstream_uri" in response else "" - ) - timestamp = parse_time(request["time"]) - token_usage = message.get("token_usage", None) - trace = message.get("trace", None) - parent_deployment = message.get("parent_deployment", None) - execution_path = message.get("execution_path", None) - deployment = message.get("deployment", "") + upstream_url = response.get("upstream_uri") or "" + token_usage = message.get("token_usage") + trace = message.get("trace") + parent_deployment = message.get("parent_deployment") + execution_path = message.get("execution_path") + deployment = message.get("deployment") or "" + response_body = get_assembled_response(message) if re.search(RATE_PATTERN, uri): await on_rate_message( @@ -262,6 +232,7 @@ async def on_log_message( timestamp, request, response, + response_body, influx_writer, topic_model, rates_calculator, diff --git a/aidial_analytics_realtime/log_request/message.py b/aidial_analytics_realtime/log_request/message.py new file mode 100644 index 0000000..ed8ef09 --- /dev/null +++ b/aidial_analytics_realtime/log_request/message.py @@ -0,0 +1,17 @@ +import json + + +def get_assembled_response(message: dict) -> dict | None: + if (assembled_response_str := message.get("assembled_response")) is None: + return None + + assembled_response = json.loads(assembled_response_str) + + # NOTE: this transformation becomes redundant in ai-dial-core>=0.22.1 + # due to the fix https://github.com/epam/ai-dial-core/pull/648 + for choice in assembled_response.get("choices") or []: + if "delta" in choice: + choice["message"] = choice["delta"] + del choice["delta"] + + return assembled_response diff --git a/aidial_analytics_realtime/universal_api_utils.py b/aidial_analytics_realtime/universal_api_utils.py deleted file mode 100644 index 17fdba8..0000000 --- a/aidial_analytics_realtime/universal_api_utils.py +++ /dev/null @@ -1,39 +0,0 @@ -def merge_str(target, source): - if target is None: - return source - else: - return target + source - - -def merge_dicts(target, source): - for key, value in source.items(): - if key in target: - if isinstance(value, int): - target[key] = value - elif isinstance(value, str): - target[key] = merge_str(target[key], value) - else: - merge(target[key], value) - else: - target[key] = value - - return target - - -def merge_lists(target, source): - for i in source: - index = i["index"] - - if index < len(target): - merge(target[index], i) - else: - target.append(i) - - return target - - -def merge(target, source): - if isinstance(source, list): - return merge_lists(target, source) - if isinstance(source, dict): - return merge_dicts(target, source) diff --git a/tests/test_app.py b/tests/test_app.py index e100c64..c288010 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -43,6 +43,29 @@ def test_chat_completion_plain_text(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1692214960, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "pong", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n', @@ -77,6 +100,29 @@ def test_chat_completion_plain_text(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-2", + "object": "chat.completion", + "created": 1700828102, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "pong", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"po"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"content":"ng"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n', @@ -212,6 +258,29 @@ def test_chat_completion_list_content(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1692214960, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "pong", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n', @@ -287,6 +356,29 @@ def test_chat_completion_none_content(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1692214960, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "5", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"5"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n', @@ -556,6 +648,29 @@ def test_data_request_with_new_format(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-1", + "object": "chat.completion", + "created": 1692214960, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "pong", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-1","object":"chat.completion.chunk","created":1692214960,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n', @@ -600,6 +715,29 @@ def test_data_request_with_new_format(): } ), }, + "assembled_response": json.dumps( + { + "id": "chatcmpl-2", + "object": "chat.completion", + "created": 1700828102, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "pong", + }, + "finish_reason": "stop", + } + ], + "usage": { + "completion_tokens": 189, + "prompt_tokens": 22, + "total_tokens": 211, + }, + } + ), "response": { "status": "200", "body": 'data: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{"role":"assistant","content":"pong"},"finish_reason":null}]}\n\ndata: {"id":"chatcmpl-2","object":"chat.completion.chunk","created":1700828102,"model":"gpt-4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"completion_tokens":189,"prompt_tokens":22,"total_tokens":211}}\n\ndata: [DONE]\n',