Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(llmobs): fix content arg extraction for vertex ai integration #12034

Merged
merged 8 commits into from
Jan 23, 2025
Merged
6 changes: 4 additions & 2 deletions ddtrace/contrib/internal/vertexai/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __iter__(self):
if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
self._kwargs["instance"] = self._model_instance
self._kwargs["history"] = self._history
self._kwargs["is_chat"] = self.is_chat
ncybul marked this conversation as resolved.
Show resolved Hide resolved
self._dd_integration.llmobs_set_tags(
self._dd_span, args=self._args, kwargs=self._kwargs, response=self._chunks
)
Expand Down Expand Up @@ -80,6 +81,7 @@ async def __aiter__(self):
if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
self._kwargs["instance"] = self._model_instance
self._kwargs["history"] = self._history
self._kwargs["is_chat"] = self.is_chat
self._dd_integration.llmobs_set_tags(
self._dd_span, args=self._args, kwargs=self._kwargs, response=self._chunks
)
Expand Down Expand Up @@ -177,13 +179,13 @@ def _tag_request_content(span, integration, content, content_idx):
tag_request_content_part_google("vertexai", span, integration, part, part_idx, content_idx)


def tag_request(span, integration, instance, args, kwargs):
def tag_request(span, integration, instance, args, kwargs, is_chat):
"""Tag the generation span with request details.
Includes capturing generation configuration, system prompt, and messages.
"""
# instance is either a chat session or a model itself
model_instance = instance if isinstance(instance, GenerativeModel) else instance._model
contents = get_argument_value(args, kwargs, 0, "contents")
contents = get_argument_value(args, kwargs, 0, "content" if is_chat else "contents")
history = _get_attr(instance, "_history", [])
if history:
if isinstance(contents, list):
Expand Down
6 changes: 4 additions & 2 deletions ddtrace/contrib/internal/vertexai/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
# history must be copied since it is modified during the LLM interaction
history = getattr(instance, "history", [])[:]
try:
tag_request(span, integration, instance, args, kwargs)
tag_request(span, integration, instance, args, kwargs, is_chat)
generations = func(*args, **kwargs)
if stream:
return TracedVertexAIStreamResponse(
Expand All @@ -80,6 +80,7 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
if integration.is_pc_sampled_llmobs(span):
kwargs["instance"] = model_instance
kwargs["history"] = history
kwargs["is_chat"] = is_chat
integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=generations)
span.finish()
return generations
Expand All @@ -99,7 +100,7 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
# history must be copied since it is modified during the LLM interaction
history = getattr(instance, "history", [])[:]
try:
tag_request(span, integration, instance, args, kwargs)
tag_request(span, integration, instance, args, kwargs, is_chat)
generations = await func(*args, **kwargs)
if stream:
return TracedAsyncVertexAIStreamResponse(
Expand All @@ -115,6 +116,7 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
if integration.is_pc_sampled_llmobs(span):
kwargs["instance"] = model_instance
kwargs["history"] = history
kwargs["is_chat"] = is_chat
integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=generations)
span.finish()
return generations
Expand Down
3 changes: 2 additions & 1 deletion ddtrace/llmobs/_integrations/vertexai.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ def _llmobs_set_tags(
) -> None:
instance = kwargs.get("instance", None)
history = kwargs.get("history", [])
is_chat = kwargs.get("is_chat", False)
metadata = llmobs_get_metadata_google(kwargs, instance)

system_instruction = get_system_instructions_from_google_model(instance)
input_contents = get_argument_value(args, kwargs, 0, "contents")
input_contents = get_argument_value(args, kwargs, 0, "content" if is_chat else "contents")
input_messages = self._extract_input_message(input_contents, history, system_instruction)

output_messages = [{"content": ""}]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
vertexai: Fixes chat.send_message content keyword argument extraction.
ncybul marked this conversation as resolved.
Show resolved Hide resolved
8 changes: 4 additions & 4 deletions tests/contrib/vertexai/test_vertexai.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_vertexai_completion(vertexai):
llm = vertexai.generative_models.GenerativeModel("gemini-1.5-flash")
llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
llm.generate_content(
"Why do bears hibernate?",
contents="Why do bears hibernate?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -118,7 +118,7 @@ def test_vertexai_completion_stream(vertexai):
(_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_STREAM_CHUNKS)
]
response = llm.generate_content(
"How big is the solar system?",
contents="How big is the solar system?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -278,7 +278,7 @@ def test_vertexai_chat(vertexai):
llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
chat = llm.start_chat()
chat.send_message(
"Why do bears hibernate?",
content="Why do bears hibernate?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -371,7 +371,7 @@ def test_vertexai_chat_stream(vertexai):
]
chat = llm.start_chat()
response = chat.send_message(
"How big is the solar system?",
content="How big is the solar system?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down
8 changes: 4 additions & 4 deletions tests/contrib/vertexai/test_vertexai_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_completion(self, vertexai, mock_llmobs_writer, mock_tracer):
llm = vertexai.generative_models.GenerativeModel("gemini-1.5-flash")
llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
llm.generate_content(
"Why do bears hibernate?",
contents="Why do bears hibernate?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -126,7 +126,7 @@ def test_completion_stream(self, vertexai, mock_llmobs_writer, mock_tracer):
(_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_STREAM_CHUNKS)
]
response = llm.generate_content(
"How big is the solar system?",
contents="How big is the solar system?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -293,7 +293,7 @@ def test_chat(self, vertexai, mock_llmobs_writer, mock_tracer):
llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
chat = llm.start_chat()
chat.send_message(
"Why do bears hibernate?",
content="Why do bears hibernate?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down Expand Up @@ -389,7 +389,7 @@ def test_chat_stream(self, vertexai, mock_llmobs_writer, mock_tracer):
]
chat = llm.start_chat()
response = chat.send_message(
"How big is the solar system?",
content="How big is the solar system?",
generation_config=vertexai.generative_models.GenerationConfig(
stop_sequences=["x"], max_output_tokens=30, temperature=1.0
),
Expand Down
Loading