Merge branch 'langchain-ai:master' into master

ZenGuard-AI · Jun 15, 2024 · c8ff6a8 · c8ff6a8
2 parents 43cebea + 570d45b
commit c8ff6a8
Show file tree

Hide file tree

Showing 133 changed files with 600 additions and 233,640 deletions.
diff --git a/.github/workflows/scheduled_test.yml b/.github/workflows/scheduled_test.yml
@@ -31,7 +31,6 @@ jobs:
           - "libs/partners/google-vertexai"
           - "libs/partners/google-genai"
           - "libs/partners/aws"
-          - "libs/partners/nvidia-ai-endpoints"
 
     steps:
       - uses: actions/checkout@v4
@@ -41,10 +40,6 @@ jobs:
         with:
           repository: langchain-ai/langchain-google
           path: langchain-google
-      - uses: actions/checkout@v4
-        with:
-          repository: langchain-ai/langchain-nvidia
-          path: langchain-nvidia
       - uses: actions/checkout@v4
         with:
           repository: langchain-ai/langchain-cohere
@@ -59,11 +54,9 @@ jobs:
           rm -rf \
             langchain/libs/partners/google-genai \
             langchain/libs/partners/google-vertexai \
-            langchain/libs/partners/nvidia-ai-endpoints \
             langchain/libs/partners/cohere
           mv langchain-google/libs/genai langchain/libs/partners/google-genai
           mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
-          mv langchain-nvidia/libs/ai-endpoints langchain/libs/partners/nvidia-ai-endpoints
           mv langchain-cohere/libs/cohere langchain/libs/partners/cohere
           mv langchain-aws/libs/aws langchain/libs/partners/aws
 
@@ -123,7 +116,6 @@ jobs:
           rm -rf \
             langchain/libs/partners/google-genai \
             langchain/libs/partners/google-vertexai \
-            langchain/libs/partners/nvidia-ai-endpoints \
             langchain/libs/partners/cohere \
             langchain/libs/partners/aws
 

diff --git a/docs/docs/integrations/tools/python.ipynb b/docs/docs/integrations/tools/python.ipynb
@@ -9,7 +9,14 @@
     "\n",
     "Sometimes, for complex calculations, rather than have an LLM generate the answer directly, it can be better to have the LLM generate code to calculate the answer, and then run that code to get the answer. In order to easily do that, we provide a simple Python REPL to execute commands in.\n",
     "\n",
-    "This interface will only return things that are printed - therefore, if you want to use it to calculate an answer, make sure to have it print out the answer."
+    "This interface will only return things that are printed - therefore, if you want to use it to calculate an answer, make sure to have it print out the answer.\n",
+    "\n",
+    "\n",
+    ":::{.callout-caution}\n",
+    "Python REPL can execute arbitrary code on the host machine (e.g., delete files, make network requests). Use with caution.\n",
+    "\n",
+    "For more information general security guidelines, please see https://python.langchain.com/v0.2/docs/security/.\n",
+    ":::"
    ]
   },
   {
@@ -95,7 +102,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,

diff --git a/docs/scripts/model_feat_table.py b/docs/scripts/model_feat_table.py
@@ -98,6 +98,7 @@
     },
     "ChatOllama": {
         "local": True,
+        "json_mode": True,
         "package": "langchain-community",
         "link": "/docs/integrations/chat/ollama/",
     },

diff --git a/docs/src/theme/ChatModelTabs.js b/docs/src/theme/ChatModelTabs.js
@@ -10,13 +10,15 @@ import CodeBlock from "@theme-original/CodeBlock";
  * @property {string} [anthropicParams] - Parameters for Anthropic chat model. Defaults to `model="claude-3-sonnet-20240229"`
  * @property {string} [cohereParams] - Parameters for Cohere chat model. Defaults to `model="command-r"`
  * @property {string} [fireworksParams] - Parameters for Fireworks chat model. Defaults to `model="accounts/fireworks/models/mixtral-8x7b-instruct"`
+ * @property {string} [groqParams] - Parameters for Groq chat model. Defaults to `model="llama3-8b-8192"`
  * @property {string} [mistralParams] - Parameters for Mistral chat model. Defaults to `model="mistral-large-latest"`
  * @property {string} [googleParams] - Parameters for Google chat model. Defaults to `model="gemini-pro"`
  * @property {string} [togetherParams] - Parameters for Together chat model. Defaults to `model="mistralai/Mixtral-8x7B-Instruct-v0.1"`
  * @property {boolean} [hideOpenai] - Whether or not to hide OpenAI chat model.
  * @property {boolean} [hideAnthropic] - Whether or not to hide Anthropic chat model.
  * @property {boolean} [hideCohere] - Whether or not to hide Cohere chat model.
  * @property {boolean} [hideFireworks] - Whether or not to hide Fireworks chat model.
+ * @property {boolean} [hideGroq] - Whether or not to hide Groq chat model.
  * @property {boolean} [hideMistral] - Whether or not to hide Mistral chat model.
  * @property {boolean} [hideGoogle] - Whether or not to hide Google VertexAI chat model.
  * @property {boolean} [hideTogether] - Whether or not to hide Together chat model.
@@ -33,6 +35,7 @@ export default function ChatModelTabs(props) {
     anthropicParams,
     cohereParams,
     fireworksParams,
+    groqParams,
     mistralParams,
     googleParams,
     togetherParams,
@@ -41,6 +44,7 @@ export default function ChatModelTabs(props) {
     hideAnthropic,
     hideCohere,
     hideFireworks,
+    hideGroq,
     hideMistral,
     hideGoogle,
     hideTogether,
@@ -55,6 +59,7 @@ export default function ChatModelTabs(props) {
   const fireworksParamsOrDefault =
     fireworksParams ??
     `model="accounts/fireworks/models/mixtral-8x7b-instruct"`;
+  const groqParamsOrDefault = groqParams ?? `model="llama3-8b-8192"`;
   const mistralParamsOrDefault =
     mistralParams ?? `model="mistral-large-latest"`;
   const googleParamsOrDefault = googleParams ?? `model="gemini-pro"`;
@@ -122,6 +127,15 @@ export default function ChatModelTabs(props) {
       default: false,
       shouldHide: hideFireworks,
     },
+    {
+      value: "Groq",
+      label: "Groq",
+      text: `from langchain_groq import ChatGroq\n\n${llmVarName} = ChatGroq(${groqParamsOrDefault})`,
+      apiKeyName: "GROQ_API_KEY",
+      packageName: "langchain-groq",
+      default: false,
+      shouldHide: hideGroq,
+    },
     {
       value: "MistralAI",
       label: "MistralAI",

diff --git a/libs/cli/langchain_cli/constants.py b/libs/cli/langchain_cli/constants.py
@@ -1,4 +1,3 @@
 DEFAULT_GIT_REPO = "https://github.com/langchain-ai/langchain.git"
-DEFAULT_GIT_REF = "langserve-templates"
 DEFAULT_GIT_SUBDIRECTORY = "templates"
 DEFAULT_GIT_REF = "master"
diff --git a/libs/community/langchain_community/chat_models/huggingface.py b/libs/community/langchain_community/chat_models/huggingface.py
@@ -39,7 +39,7 @@
 @deprecated(
     since="0.0.37",
     removal="0.3",
-    alternative_import=("from langchain_huggingface import ChatHuggingFace"),
+    alternative_import="langchain_huggingface.ChatHuggingFace",
 )
 class ChatHuggingFace(BaseChatModel):
     """

diff --git a/libs/community/langchain_community/document_loaders/confluence.py b/libs/community/langchain_community/document_loaders/confluence.py
@@ -42,7 +42,7 @@ class ConfluenceLoader(BaseLoader):
 
     You can also specify a boolean `include_attachments` to include attachments, this
     is set to False by default, if set to True all attachments will be downloaded and
-    ConfluenceReader will extract the text from the attachments and add it to the
+    ConfluenceLoader will extract the text from the attachments and add it to the
     Document object. Currently supported attachment types are: PDF, PNG, JPEG/JPG,
     SVG, Word and Excel.
 

diff --git a/libs/community/langchain_community/document_loaders/sitemap.py b/libs/community/langchain_community/document_loaders/sitemap.py
@@ -1,6 +1,16 @@
 import itertools
 import re
-from typing import Any, Callable, Generator, Iterable, Iterator, List, Optional, Tuple
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+)
 from urllib.parse import urlparse
 
 from langchain_core.documents import Document
@@ -75,6 +85,7 @@ def __init__(
         is_local: bool = False,
         continue_on_failure: bool = False,
         restrict_to_same_domain: bool = True,
+        max_depth: int = 10,
         **kwargs: Any,
     ):
         """Initialize with webpage path and optional filter URLs.
@@ -105,6 +116,7 @@ def __init__(
             restrict_to_same_domain: whether to restrict loading to URLs to the same
                 domain as the sitemap. Attention: This is only applied if the sitemap
                 is not a local file!
+            max_depth: maximum depth to follow sitemap links. Default: 10
         """
 
         if blocksize is not None and blocksize < 1:
@@ -134,17 +146,23 @@ def __init__(
         self.blocknum = blocknum
         self.is_local = is_local
         self.continue_on_failure = continue_on_failure
+        self.max_depth = max_depth
 
-    def parse_sitemap(self, soup: Any) -> List[dict]:
+    def parse_sitemap(self, soup: Any, *, depth: int = 0) -> List[dict]:
         """Parse sitemap xml and load into a list of dicts.
 
         Args:
             soup: BeautifulSoup object.
+            depth: current depth of the sitemap. Default: 0
 
         Returns:
             List of dicts.
         """
-        els = []
+        if depth >= self.max_depth:
+            return []
+
+        els: List[Dict] = []
+
         for url in soup.find_all("url"):
             loc = url.find("loc")
             if not loc:
@@ -177,9 +195,9 @@ def parse_sitemap(self, soup: Any) -> List[dict]:
             loc = sitemap.find("loc")
             if not loc:
                 continue
-            soup_child = self.scrape_all([loc.text], "xml")[0]
 
-            els.extend(self.parse_sitemap(soup_child))
+            soup_child = self.scrape_all([loc.text], "xml")[0]
+            els.extend(self.parse_sitemap(soup_child, depth=depth + 1))
         return els
 
     def lazy_load(self) -> Iterator[Document]:

diff --git a/libs/community/langchain_community/document_transformers/long_context_reorder.py b/libs/community/langchain_community/document_transformers/long_context_reorder.py
@@ -42,4 +42,4 @@ def transform_documents(
     async def atransform_documents(
         self, documents: Sequence[Document], **kwargs: Any
     ) -> Sequence[Document]:
-        raise NotImplementedError
+        return _litm_reordering(list(documents))
diff --git a/libs/community/langchain_community/llms/ollama.py b/libs/community/langchain_community/llms/ollama.py
@@ -112,15 +112,16 @@ class _OllamaCommon(BaseLanguageModel):
     """Timeout for the request stream"""
 
     keep_alive: Optional[Union[int, str]] = None
-    """How long the model will stay loaded into memory.
+    """How long the model will stay loaded into memory."""
 
+    raw: Optional[bool] = None
+    """raw or not.""
     The parameter (Default: 5 minutes) can be set to:
     1. a duration string in Golang (such as "10m" or "24h");
     2. a number in seconds (such as 3600);
     3. any negative number which will keep the model loaded \
         in memory (e.g. -1 or "-1m");
     4. 0 which will unload the model immediately after generating a response;
-
     See the [Ollama documents](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately)"""
 
     headers: Optional[dict] = None
@@ -154,6 +155,7 @@ def _default_params(self) -> Dict[str, Any]:
             "system": self.system,
             "template": self.template,
             "keep_alive": self.keep_alive,
+            "raw": self.raw,
         }
 
     @property
@@ -227,7 +229,6 @@ def _create_stream(
                 "images": payload.get("images", []),
                 **params,
             }
-
         response = requests.post(
             url=api_url,
             headers={
@@ -369,12 +370,9 @@ async def _astream_with_aggregation(
 
 class Ollama(BaseLLM, _OllamaCommon):
     """Ollama locally runs large language models.
-
     To use, follow the instructions at https://ollama.ai/.
-
     Example:
         .. code-block:: python
-
             from langchain_community.llms import Ollama
             ollama = Ollama(model="llama2")
     """
@@ -398,17 +396,13 @@ def _generate(  # type: ignore[override]
         **kwargs: Any,
     ) -> LLMResult:
         """Call out to Ollama's generate endpoint.
-
         Args:
             prompt: The prompt to pass into the model.
             stop: Optional list of stop words to use when generating.
-
         Returns:
             The string generated by the model.
-
         Example:
             .. code-block:: python
-
                 response = ollama("Tell me a joke.")
         """
         # TODO: add caching here.
@@ -434,17 +428,13 @@ async def _agenerate(  # type: ignore[override]
         **kwargs: Any,
     ) -> LLMResult:
         """Call out to Ollama's generate endpoint.
-
         Args:
             prompt: The prompt to pass into the model.
             stop: Optional list of stop words to use when generating.
-
         Returns:
             The string generated by the model.
-
         Example:
             .. code-block:: python
-
                 response = ollama("Tell me a joke.")
         """
         # TODO: add caching here.

diff --git a/libs/community/langchain_community/utilities/__init__.py b/libs/community/langchain_community/utilities/__init__.py
@@ -122,9 +122,6 @@
     from langchain_community.utilities.pubmed import (
         PubMedAPIWrapper,
     )
-    from langchain_community.utilities.python import (
-        PythonREPL,
-    )
     from langchain_community.utilities.rememberizer import RememberizerAPIWrapper
     from langchain_community.utilities.requests import (
         Requests,
@@ -215,7 +212,6 @@
     "Portkey",
     "PowerBIDataset",
     "PubMedAPIWrapper",
-    "PythonREPL",
     "RememberizerAPIWrapper",
     "Requests",
     "RequestsWrapper",
@@ -279,7 +275,6 @@
     "Portkey": "langchain_community.utilities.portkey",
     "PowerBIDataset": "langchain_community.utilities.powerbi",
     "PubMedAPIWrapper": "langchain_community.utilities.pubmed",
-    "PythonREPL": "langchain_community.utilities.python",
     "RememberizerAPIWrapper": "langchain_community.utilities.rememberizer",
     "Requests": "langchain_community.utilities.requests",
     "RequestsWrapper": "langchain_community.utilities.requests",
@@ -302,8 +297,21 @@
     "ZapierNLAWrapper": "langchain_community.utilities.zapier",
 }
 
+REMOVED = {
+    "PythonREPL": (
+        "PythonREPL has been deprecated from langchain_community "
+        "due to being flagged by security scanners. See: "
+        "https://github.com/langchain-ai/langchain/issues/14345 "
+        "If you need to use it, please use the version "
+        "from langchain_experimental. "
+        "from langchain_experimental.utilities.python import PythonREPL."
+    )
+}
+
 
 def __getattr__(name: str) -> Any:
+    if name in REMOVED:
+        raise AssertionError(REMOVED[name])
     if name in _module_lookup:
         module = importlib.import_module(_module_lookup[name])
         return getattr(module, name)