From cb654a3245d1baa621e8871b3ee47af2259fe069 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Thu, 13 Jun 2024 02:26:55 -0700 Subject: [PATCH] docs[patch]: Adds multimodal column to chat models table, move up in concepts (#22837) CC @hwchase17 @baskaryan --- docs/docs/concepts.mdx | 18 ++++++++++-------- docs/scripts/model_feat_table.py | 8 ++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/docs/concepts.mdx b/docs/docs/concepts.mdx index 8188a80e08a33..d87bec22707ab 100644 --- a/docs/docs/concepts.mdx +++ b/docs/docs/concepts.mdx @@ -155,6 +155,16 @@ Please see the [tool calling section](/docs/concepts/#functiontool-calling) for For specifics on how to use chat models, see the [relevant how-to guides here](/docs/how_to/#chat-models). +#### Multimodality + +Some chat models are multimodal, accepting images, audio and even video as inputs. These are still less common, meaning model providers haven't standardized on the "best" way to define the API. Multimodal **outputs** are even less common. As such, we've kept our multimodal abstractions fairly light weight and plan to further solidify the multimodal APIs and interaction patterns as the field matures. + +In LangChain, most chat models that support multimodal inputs also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations. + +For specifics on how to use multimodal models, see the [relevant how-to guides here](/docs/how_to/#multimodal). + +For a full list of LangChain model providers with multimodal models, [check out this table](/docs/integrations/chat/#advanced-features). + ### LLMs @@ -514,14 +524,6 @@ If you are still using AgentExecutor, do not fear: we still have a guide on [how It is recommended, however, that you start to transition to LangGraph. In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent). -### Multimodal - -Some models are multimodal, accepting images, audio and even video as inputs. These are still less common, meaning model providers haven't standardized on the "best" way to define the API. Multimodal **outputs** are even less common. As such, we've kept our multimodal abstractions fairly light weight and plan to further solidify the multimodal APIs and interaction patterns as the field matures. - -In LangChain, most chat models that support multimodal inputs also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations. - -For specifics on how to use multimodal models, see the [relevant how-to guides here](/docs/how_to/#multimodal). - ### Callbacks LangChain provides a callbacks system that allows you to hook into the various stages of your LLM application. This is useful for logging, monitoring, streaming, and other tasks. diff --git a/docs/scripts/model_feat_table.py b/docs/scripts/model_feat_table.py index f4236b6014dc2..9f7623fd46072 100644 --- a/docs/scripts/model_feat_table.py +++ b/docs/scripts/model_feat_table.py @@ -18,6 +18,7 @@ "ChatAnthropic": { "tool_calling": True, "structured_output": True, + "multimodal": True, "package": "langchain-anthropic", "link": "/docs/integrations/chat/anthropic/", }, @@ -39,6 +40,7 @@ "tool_calling": True, "structured_output": True, "json_mode": True, + "multimodal": True, "package": "langchain-openai", "link": "/docs/integrations/chat/azure_chat_openai/", }, @@ -46,6 +48,7 @@ "tool_calling": True, "structured_output": True, "json_mode": True, + "multimodal": True, "package": "langchain-openai", "link": "/docs/integrations/chat/openai/", }, @@ -59,11 +62,13 @@ "ChatVertexAI": { "tool_calling": True, "structured_output": True, + "multimodal": True, "package": "langchain-google-vertexai", "link": "/docs/integrations/chat/google_vertex_ai_palm/", }, "ChatGoogleGenerativeAI": { "tool_calling": True, + "multimodal": True, "package": "langchain-google-genai", "link": "/docs/integrations/chat/google_generative_ai/", }, @@ -138,6 +143,7 @@ sidebar_class_name: hidden keywords: [compatibility, bind_tools, tool calling, function calling, structured output, with_structured_output, json mode, local model] custom_edit_url: +hide_table_of_contents: true --- # Chat models @@ -213,6 +219,7 @@ def get_chat_model_table() -> str: "structured_output", "json_mode", "local", + "multimodal", "package", ] title = [ @@ -221,6 +228,7 @@ def get_chat_model_table() -> str: "[Structured output](/docs/how_to/structured_output/)", "JSON mode", "Local", + "[Multimodal](/docs/how_to/multimodal_inputs/)", "Package", ] rows = [title, [":-"] + [":-:"] * (len(title) - 1)]