From 86326269a1109b92b060fcee57bffb82146402c8 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Fri, 21 Jun 2024 10:09:41 -0700 Subject: [PATCH 1/2] docs[patch]: Adds prereqs to trim messages (#23270) CC @baskaryan --- docs/docs/how_to/trim_messages.ipynb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb index 9aa61fed9c8f8..e82c126f61282 100644 --- a/docs/docs/how_to/trim_messages.ipynb +++ b/docs/docs/how_to/trim_messages.ipynb @@ -7,6 +7,19 @@ "source": [ "# How to trim messages\n", "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Messages](/docs/concepts/#messages)\n", + "- [Chat models](/docs/concepts/#chat-models)\n", + "- [Chaining](/docs/how_to/sequence/)\n", + "- [Chat history](/docs/concepts/#chat-history)\n", + "\n", + "The methods in this guide also require `langchain-core>=0.2.9`.\n", + "\n", + ":::\n", + "\n", "All models have finite context windows, meaning there's a limit to how many tokens they can take as input. If you have very long messages or a chain/agent that accumulates a long message is history, you'll need to manage the length of the messages you're passing in to the model.\n", "\n", "The `trim_messages` util provides some basic strategies for trimming a list of messages to be of a certain token length.\n", From 9eda8f2fe89806c8f31aae4ce36eeef615278dc1 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:15:31 -0700 Subject: [PATCH 2/2] docs: fix trim_messages code blocks (#23271) --- libs/core/langchain_core/messages/utils.py | 214 +++++++++------------ 1 file changed, 94 insertions(+), 120 deletions(-) diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 263bc05fb7114..520ae13322467 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -585,163 +585,137 @@ def dummy_token_counter(messages: List[BaseMessage]) -> int: return count First 30 tokens, not allowing partial messages: - .. code-block:: python + .. code-block:: python - trim_messages(messages, max_tokens=30, token_counter=dummy_token_counter, strategy="first") + trim_messages(messages, max_tokens=30, token_counter=dummy_token_counter, strategy="first") - .. code-block:: python + .. code-block:: python - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), - ] + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), + ] First 30 tokens, allowing partial messages: - .. code-block:: python + .. code-block:: python - trim_messages( - messages, - max_tokens=30, - token_counter=dummy_token_counter, - strategy="first", - allow_partial=True, - ) + trim_messages( + messages, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="first", + allow_partial=True, + ) - .. code-block:: python + .. code-block:: python - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), - AIMessage( [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"), - ] + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), + AIMessage( [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second"), + ] First 30 tokens, allowing partial messages, have to end on HumanMessage: - .. code-block:: python + .. code-block:: python - trim_messages( - messages, - max_tokens=30, - token_counter=dummy_token_counter, - strategy="first" - allow_partial=True, - end_on="human", - ) + trim_messages( + messages, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="first" + allow_partial=True, + end_on="human", + ) - .. code-block:: python + .. code-block:: python - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), - ] + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="first"), + ] Last 30 tokens, including system message, not allowing partial messages: - .. code-block:: python + .. code-block:: python - trim_messages(messages, max_tokens=30, include_system=True, token_counter=dummy_token_counter, strategy="last") + trim_messages(messages, max_tokens=30, include_system=True, token_counter=dummy_token_counter, strategy="last") - .. code-block:: python + .. code-block:: python - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), - AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), - ] + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), + AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), + ] Last 40 tokens, including system message, allowing partial messages: - .. code-block:: python - - trim_messages( - messages, - max_tokens=40, - token_counter=dummy_token_counter, - strategy="last", - allow_partial=True, - include_system=True - ) - - .. code-block:: python - - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - AIMessage( - [{"type": "text", "text": "This is the FIRST 4 token block."},], - id="second", - ), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), - AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), - ] - - Last 30 tokens, including system message, allowing partial messages, end on HumanMessage: - .. code-block:: python - - trim_messages( - messages, - max_tokens=30, - token_counter=dummy_token_counter, - strategy="last", - end_on="human", - include_system=True, - allow_partial=True, - ) - - .. code-block:: python - - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - AIMessage( - [{"type": "text", "text": "This is the FIRST 4 token block."},], - id="second", - ), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), - ] - - - Last 40 tokens, including system message, allowing partial messages, start on HumanMessage: - .. code-block:: python - - trim_messages( - messages, - max_tokens=40, - token_counter=dummy_token_counter, - strategy="last", - include_system=True, - allow_partial=True, - start_on="human" - ) - - .. code-block:: python - - [ - SystemMessage("This is a 4 token text. The full message is 10 tokens."), - HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), - AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), - ] - - Using a TextSplitter for splitting parting messages: .. code-block:: python - ... + trim_messages( + messages, + max_tokens=40, + token_counter=dummy_token_counter, + strategy="last", + allow_partial=True, + include_system=True + ) .. code-block:: python - ... + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + AIMessage( + [{"type": "text", "text": "This is the FIRST 4 token block."},], + id="second", + ), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), + AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), + ] - Using a model for token counting: + Last 30 tokens, including system message, allowing partial messages, end on HumanMessage: .. code-block:: python - ... + trim_messages( + messages, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="last", + end_on="human", + include_system=True, + allow_partial=True, + ) .. code-block:: python - ... + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + AIMessage( + [{"type": "text", "text": "This is the FIRST 4 token block."},], + id="second", + ), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), + ] - Chaining: + Last 40 tokens, including system message, allowing partial messages, start on HumanMessage: .. code-block:: python - ... + trim_messages( + messages, + max_tokens=40, + token_counter=dummy_token_counter, + strategy="last", + include_system=True, + allow_partial=True, + start_on="human" + ) + .. code-block:: python + [ + SystemMessage("This is a 4 token text. The full message is 10 tokens."), + HumanMessage("This is a 4 token text. The full message is 10 tokens.", id="third"), + AIMessage("This is a 4 token text. The full message is 10 tokens.", id="fourth"), + ] """ # noqa: E501 from langchain_core.language_models import BaseLanguageModel