DataDog · ncybul · Jan 23, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025
@@ -49,6 +49,7 @@ def __iter__(self):
             if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
                 self._kwargs["instance"] = self._model_instance
                 self._kwargs["history"] = self._history
+                self._kwargs["is_chat"] = self.is_chat
                 self._dd_integration.llmobs_set_tags(
                     self._dd_span, args=self._args, kwargs=self._kwargs, response=self._chunks
                 )
@@ -80,6 +81,7 @@ async def __aiter__(self):
             if self._dd_integration.is_pc_sampled_llmobs(self._dd_span):
                 self._kwargs["instance"] = self._model_instance
                 self._kwargs["history"] = self._history
+                self._kwargs["is_chat"] = self.is_chat
                 self._dd_integration.llmobs_set_tags(
                     self._dd_span, args=self._args, kwargs=self._kwargs, response=self._chunks
                 )
@@ -177,13 +179,13 @@ def _tag_request_content(span, integration, content, content_idx):
         tag_request_content_part_google("vertexai", span, integration, part, part_idx, content_idx)
 
 
-def tag_request(span, integration, instance, args, kwargs):
+def tag_request(span, integration, instance, args, kwargs, is_chat):
     """Tag the generation span with request details.
     Includes capturing generation configuration, system prompt, and messages.
     """
     # instance is either a chat session or a model itself
     model_instance = instance if isinstance(instance, GenerativeModel) else instance._model
-    contents = get_argument_value(args, kwargs, 0, "contents")
+    contents = get_argument_value(args, kwargs, 0, "content" if is_chat else "contents")
     history = _get_attr(instance, "_history", [])
     if history:
         if isinstance(contents, list):

@@ -64,7 +64,7 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
     # history must be copied since it is modified during the LLM interaction
     history = getattr(instance, "history", [])[:]
     try:
-        tag_request(span, integration, instance, args, kwargs)
+        tag_request(span, integration, instance, args, kwargs, is_chat)
         generations = func(*args, **kwargs)
         if stream:
             return TracedVertexAIStreamResponse(
@@ -80,6 +80,7 @@ def _traced_generate(vertexai, pin, func, instance, args, kwargs, model_instance
             if integration.is_pc_sampled_llmobs(span):
                 kwargs["instance"] = model_instance
                 kwargs["history"] = history
+                kwargs["is_chat"] = is_chat
                 integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=generations)
             span.finish()
     return generations
@@ -99,7 +100,7 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
     # history must be copied since it is modified during the LLM interaction
     history = getattr(instance, "history", [])[:]
     try:
-        tag_request(span, integration, instance, args, kwargs)
+        tag_request(span, integration, instance, args, kwargs, is_chat)
         generations = await func(*args, **kwargs)
         if stream:
             return TracedAsyncVertexAIStreamResponse(
@@ -115,6 +116,7 @@ async def _traced_agenerate(vertexai, pin, func, instance, args, kwargs, model_i
             if integration.is_pc_sampled_llmobs(span):
                 kwargs["instance"] = model_instance
                 kwargs["history"] = history
+                kwargs["is_chat"] = is_chat
                 integration.llmobs_set_tags(span, args=args, kwargs=kwargs, response=generations)
             span.finish()
     return generations

@@ -42,10 +42,11 @@ def _llmobs_set_tags(
     ) -> None:
         instance = kwargs.get("instance", None)
         history = kwargs.get("history", [])
+        is_chat = kwargs.get("is_chat", False)
         metadata = llmobs_get_metadata_google(kwargs, instance)
 
         system_instruction = get_system_instructions_from_google_model(instance)
-        input_contents = get_argument_value(args, kwargs, 0, "contents")
+        input_contents = get_argument_value(args, kwargs, 0, "content" if is_chat else "contents")
         input_messages = self._extract_input_message(input_contents, history, system_instruction)
 
         output_messages = [{"content": ""}]

@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    vertexai: Fixes chat.send_message content keyword argument extraction.
@@ -42,7 +42,7 @@ def test_vertexai_completion(vertexai):
     llm = vertexai.generative_models.GenerativeModel("gemini-1.5-flash")
     llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
     llm.generate_content(
-        "Why do bears hibernate?",
+        contents="Why do bears hibernate?",
         generation_config=vertexai.generative_models.GenerationConfig(
             stop_sequences=["x"], max_output_tokens=30, temperature=1.0
         ),
@@ -118,7 +118,7 @@ def test_vertexai_completion_stream(vertexai):
         (_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_STREAM_CHUNKS)
     ]
     response = llm.generate_content(
-        "How big is the solar system?",
+        contents="How big is the solar system?",
         generation_config=vertexai.generative_models.GenerationConfig(
             stop_sequences=["x"], max_output_tokens=30, temperature=1.0
         ),
@@ -278,7 +278,7 @@ def test_vertexai_chat(vertexai):
     llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
     chat = llm.start_chat()
     chat.send_message(
-        "Why do bears hibernate?",
+        content="Why do bears hibernate?",
         generation_config=vertexai.generative_models.GenerationConfig(
             stop_sequences=["x"], max_output_tokens=30, temperature=1.0
         ),
@@ -371,7 +371,7 @@ def test_vertexai_chat_stream(vertexai):
     ]
     chat = llm.start_chat()
     response = chat.send_message(
-        "How big is the solar system?",
+        content="How big is the solar system?",
         generation_config=vertexai.generative_models.GenerationConfig(
             stop_sequences=["x"], max_output_tokens=30, temperature=1.0
         ),

@@ -21,7 +21,7 @@ def test_completion(self, vertexai, mock_llmobs_writer, mock_tracer):
         llm = vertexai.generative_models.GenerativeModel("gemini-1.5-flash")
         llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
         llm.generate_content(
-            "Why do bears hibernate?",
+            contents="Why do bears hibernate?",
             generation_config=vertexai.generative_models.GenerationConfig(
                 stop_sequences=["x"], max_output_tokens=30, temperature=1.0
             ),
@@ -126,7 +126,7 @@ def test_completion_stream(self, vertexai, mock_llmobs_writer, mock_tracer):
             (_mock_completion_stream_chunk(chunk) for chunk in MOCK_COMPLETION_STREAM_CHUNKS)
         ]
         response = llm.generate_content(
-            "How big is the solar system?",
+            contents="How big is the solar system?",
             generation_config=vertexai.generative_models.GenerationConfig(
                 stop_sequences=["x"], max_output_tokens=30, temperature=1.0
             ),
@@ -293,7 +293,7 @@ def test_chat(self, vertexai, mock_llmobs_writer, mock_tracer):
         llm._prediction_client.responses["generate_content"].append(_mock_completion_response(MOCK_COMPLETION_SIMPLE_1))
         chat = llm.start_chat()
         chat.send_message(
-            "Why do bears hibernate?",
+            content="Why do bears hibernate?",
             generation_config=vertexai.generative_models.GenerationConfig(
                 stop_sequences=["x"], max_output_tokens=30, temperature=1.0
             ),
@@ -389,7 +389,7 @@ def test_chat_stream(self, vertexai, mock_llmobs_writer, mock_tracer):
         ]
         chat = llm.start_chat()
         response = chat.send_message(
-            "How big is the solar system?",
+            content="How big is the solar system?",
             generation_config=vertexai.generative_models.GenerationConfig(
                 stop_sequences=["x"], max_output_tokens=30, temperature=1.0
             ),