Merge branch 'master' into master

ZenGuard-AI · Jun 21, 2024 · e8cf89d · e8cf89d
2 parents 57a3a55 + 9eda8f2
commit e8cf89d
Show file tree

Hide file tree

Showing 2 changed files with 107 additions and 120 deletions.
diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb
@@ -7,6 +7,19 @@
    "source": [
     "# How to trim messages\n",
     "\n",
+    ":::info Prerequisites\n",
+    "\n",
+    "This guide assumes familiarity with the following concepts:\n",
+    "\n",
+    "- [Messages](/docs/concepts/#messages)\n",
+    "- [Chat models](/docs/concepts/#chat-models)\n",
+    "- [Chaining](/docs/how_to/sequence/)\n",
+    "- [Chat history](/docs/concepts/#chat-history)\n",
+    "\n",
+    "The methods in this guide also require `langchain-core>=0.2.9`.\n",
+    "\n",
+    ":::\n",
+    "\n",
     "All models have finite context windows, meaning there's a limit to how many tokens they can take as input. If you have very long messages or a chain/agent that accumulates a long message is history, you'll need to manage the length of the messages you're passing in to the model.\n",
     "\n",
     "The `trim_messages` util provides some basic strategies for trimming a list of messages to be of a certain token length.\n",

diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
@@ -585,163 +585,137 @@ def dummy_token_counter(messages: List[BaseMessage]) -> int:
                 return count
 
         First 30 tokens, not allowing partial messages:
-        .. code-block:: python
+            .. code-block:: python
 
-            trim_messages(messages, max_tokens=30, token_counter=dummy_token_counter, strategy="first")
+                trim_messages(messages, max_tokens=30, token_counter=dummy_token_counter, strategy="first")
 
-        .. code-block:: python
+            .. code-block:: python
 
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
-            ]
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
+                ]
 
         First 30 tokens, allowing partial messages:
-        .. code-block:: python
+            .. code-block:: python
 
-            trim_messages(
-                messages,
-                max_tokens=30,
-                token_counter=dummy_token_counter,
-                strategy="first",
-                allow_partial=True,
-            )
+                trim_messages(
+                    messages,
+                    max_tokens=30,
+                    token_counter=dummy_token_counter,
+                    strategy="first",
+                    allow_partial=True,
+                )
 
-        .. code-block:: python
+            .. code-block:: python
 
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
-                AIMessage( [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"),
-            ]
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
+                    AIMessage( [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"),
+                ]
 
         First 30 tokens, allowing partial messages, have to end on HumanMessage:
-        .. code-block:: python
+            .. code-block:: python
 
-            trim_messages(
-                messages,
-                max_tokens=30,
-                token_counter=dummy_token_counter,
-                strategy="first"
-                allow_partial=True,
-                end_on="human",
-            )
+                trim_messages(
+                    messages,
+                    max_tokens=30,
+                    token_counter=dummy_token_counter,
+                    strategy="first"
+                    allow_partial=True,
+                    end_on="human",
+                )
 
-        .. code-block:: python
+            .. code-block:: python
 
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
-            ]
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"),
+                ]
 
 
         Last 30 tokens, including system message, not allowing partial messages:
-        .. code-block:: python
+            .. code-block:: python
 
-            trim_messages(messages, max_tokens=30, include_system=True, token_counter=dummy_token_counter, strategy="last")
+                trim_messages(messages, max_tokens=30, include_system=True, token_counter=dummy_token_counter, strategy="last")
 
-        .. code-block:: python
+            .. code-block:: python
 
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
-                AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
-            ]
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
+                    AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
+                ]
 
         Last 40 tokens, including system message, allowing partial messages:
-        .. code-block:: python
-
-            trim_messages(
-                messages,
-                max_tokens=40,
-                token_counter=dummy_token_counter,
-                strategy="last",
-                allow_partial=True,
-                include_system=True
-            )
-
-        .. code-block:: python
-
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                AIMessage(
-                    [{"type": "text", "text": "This is the FIRST 4 token block."},],
-                    id="second",
-                ),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
-                AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
-            ]
-
-        Last 30 tokens, including system message, allowing partial messages, end on HumanMessage:
-        .. code-block:: python
-
-            trim_messages(
-                messages,
-                max_tokens=30,
-                token_counter=dummy_token_counter,
-                strategy="last",
-                end_on="human",
-                include_system=True,
-                allow_partial=True,
-            )
-
-        .. code-block:: python
-
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                AIMessage(
-                    [{"type": "text", "text": "This is the FIRST 4 token block."},],
-                    id="second",
-                ),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
-            ]
-
-
-        Last 40 tokens, including system message, allowing partial messages, start on HumanMessage:
-        .. code-block:: python
-
-            trim_messages(
-                messages,
-                max_tokens=40,
-                token_counter=dummy_token_counter,
-                strategy="last",
-                include_system=True,
-                allow_partial=True,
-                start_on="human"
-            )
-
-        .. code-block:: python
-
-            [
-                SystemMessage("This is a 4 token text. The full message is 10 tokens."),
-                HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
-                AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
-            ]
-
-        Using a TextSplitter for splitting parting messages:
             .. code-block:: python
 
-                ...
+                trim_messages(
+                    messages,
+                    max_tokens=40,
+                    token_counter=dummy_token_counter,
+                    strategy="last",
+                    allow_partial=True,
+                    include_system=True
+                )
 
             .. code-block:: python
 
-                ...
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    AIMessage(
+                        [{"type": "text", "text": "This is the FIRST 4 token block."},],
+                        id="second",
+                    ),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
+                    AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
+                ]
 
-        Using a model for token counting:
+        Last 30 tokens, including system message, allowing partial messages, end on HumanMessage:
             .. code-block:: python
 
-                ...
+                trim_messages(
+                    messages,
+                    max_tokens=30,
+                    token_counter=dummy_token_counter,
+                    strategy="last",
+                    end_on="human",
+                    include_system=True,
+                    allow_partial=True,
+                )
 
             .. code-block:: python
 
-                ...
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    AIMessage(
+                        [{"type": "text", "text": "This is the FIRST 4 token block."},],
+                        id="second",
+                    ),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
+                ]
 
-        Chaining:
+        Last 40 tokens, including system message, allowing partial messages, start on HumanMessage:
             .. code-block:: python
 
-                ...
+                trim_messages(
+                    messages,
+                    max_tokens=40,
+                    token_counter=dummy_token_counter,
+                    strategy="last",
+                    include_system=True,
+                    allow_partial=True,
+                    start_on="human"
+                )
 
+            .. code-block:: python
 
+                [
+                    SystemMessage("This is a 4 token text. The full message is 10 tokens."),
+                    HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"),
+                    AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"),
+                ]
     """  # noqa: E501
     from langchain_core.language_models import BaseLanguageModel