langchain-ai · isahers1 · Mar 3, 2025 · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -1,4 +1,4 @@
 """Client for interacting with the LangSmith API.

 Use the client to customize API keys / workspace connections, SSL certs,
 etc. for tracing.
@@ -5344,6 +5344,7 @@
                 run_id_ = res.target_run_id
             elif run is not None:
                 run_id_ = run.id
+            error = res.extra.get("error", None) if res.extra is not None else None
 
             _submit_feedback(
                 run_id=run_id_,
@@ -5361,6 +5362,7 @@
                 project_id=project_id,
                 extra=res.extra,
                 trace_id=run.trace_id if run else None,
+                error=error,
             )
         return results
 
@@ -5430,6 +5432,7 @@
         feedback_group_id: Optional[ID_TYPE] = None,
         extra: Optional[Dict] = None,
         trace_id: Optional[ID_TYPE] = None,
+        error: Optional[bool] = None,
         **kwargs: Any,
     ) -> ls_schemas.Feedback:
         """Create a feedback in the LangSmith API.
@@ -5547,6 +5550,7 @@
                 ),
                 feedback_group_id=_ensure_uuid(feedback_group_id, accept_null=True),
                 extra=extra,
+                error=error,
             )
 
             use_multipart = (self.info.batch_ingest_config or {}).get(

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
@@ -983,10 +983,12 @@ async def _arun_evaluators(
             eval_results = current_results["evaluation_results"]
 
             async def _run_single_evaluator(evaluator):
+                evaluator_run_id = uuid.uuid4()
                 try:
-                    evaluator_response = await evaluator.aevaluate_run(
+                    evaluator_response = await evaluator.aevaluate_run(  # type: ignore[call-arg]
                         run=run,
                         example=self._get_example_with_readers(example),
+                        source_run_id=evaluator_run_id,
                     )
                     selected_results = self.client._select_eval_results(
                         evaluator_response
@@ -1005,7 +1007,7 @@ async def _run_single_evaluator(evaluator):
                             results=[
                                 EvaluationResult(
                                     key=key,
-                                    source_run_id=run.id,
+                                    source_run_id=evaluator_run_id,
                                     comment=repr(e),
                                     extra={"error": True},
                                 )

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -1630,10 +1630,12 @@ def _run_evaluators(
             example = current_results["example"]
             eval_results = current_results["evaluation_results"]
             for evaluator in evaluators:
+                evaluator_run_id = uuid.uuid4()
                 try:
-                    evaluator_response = evaluator.evaluate_run(
+                    evaluator_response = evaluator.evaluate_run(  # type: ignore[call-arg]
                         run=run,
                         example=example,
+                        source_run_id=evaluator_run_id,
                     )
 
                     eval_results["results"].extend(
@@ -1652,7 +1654,7 @@ def _run_evaluators(
                             results=[
                                 EvaluationResult(
                                     key=key,
-                                    source_run_id=run.id,
+                                    source_run_id=evaluator_run_id,
                                     comment=repr(e),
                                     extra={"error": True},
                                 )

diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py
@@ -307,7 +307,10 @@ def is_async(self) -> bool:
         return hasattr(self, "afunc")
 
     def evaluate_run(
-        self, run: Run, example: Optional[Example] = None
+        self,
+        run: Run,
+        example: Optional[Example] = None,
+        source_run_id: Optional[uuid.UUID] = None,
     ) -> Union[EvaluationResult, EvaluationResults]:
         """Evaluate a run using the wrapped function.
 
@@ -329,7 +332,8 @@ def evaluate_run(
                 )
             else:
                 return running_loop.run_until_complete(self.aevaluate_run(run, example))
-        source_run_id = uuid.uuid4()
+        if source_run_id is None:
+            source_run_id = uuid.uuid4()
         metadata: Dict[str, Any] = {"target_run_id": run.id}
         if getattr(run, "session_id", None):
             metadata["experiment"] = str(run.session_id)
@@ -340,7 +344,12 @@ def evaluate_run(
         )
         return self._format_result(result, source_run_id)
 
-    async def aevaluate_run(self, run: Run, example: Optional[Example] = None):
+    async def aevaluate_run(
+        self,
+        run: Run,
+        example: Optional[Example] = None,
+        source_run_id: Optional[uuid.UUID] = None,
+    ):
         """Evaluate a run asynchronously using the wrapped async function.
 
         This method directly invokes the wrapped async function with the
@@ -356,7 +365,8 @@ async def aevaluate_run(self, run: Run, example: Optional[Example] = None):
         """
         if not hasattr(self, "afunc"):
             return await super().aevaluate_run(run, example)
-        source_run_id = uuid.uuid4()
+        if source_run_id is None:
+            source_run_id = uuid.uuid4()
         metadata: Dict[str, Any] = {"target_run_id": run.id}
         if getattr(run, "session_id", None):
             metadata["experiment"] = str(run.session_id)

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -643,6 +643,8 @@ class FeedbackCreate(FeedbackBase):
     feedback_source: FeedbackSourceBase
     """The source of the feedback."""
     feedback_config: Optional[FeedbackConfig] = None
+    """The config for the feedback"""
+    error: Optional[bool] = None
 
 
 class Feedback(FeedbackBase):