Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
yaksh0nti authored Jun 20, 2024
2 parents 9e68444 + a349fce commit e96359c
Showing 12 changed files with 216 additions and 38 deletions.
11 changes: 11 additions & 0 deletions docs/docs/how_to/document_loader_pdf.ipynb
Original file line number Diff line number Diff line change
@@ -69,6 +69,17 @@
"Once we have loaded PDFs into LangChain `Document` objects, we can index them (e.g., a RAG application) in the usual way:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3b932bb",
"metadata": {},
"outputs": [],
"source": [
"%pip install faiss-cpu \n",
"# use `pip install faiss-gpu` for CUDA GPU support"
]
},
{
"cell_type": "code",
"execution_count": null,
6 changes: 5 additions & 1 deletion docs/docs/how_to/functions.ipynb
Original file line number Diff line number Diff line change
@@ -300,7 +300,11 @@
"id": "922b48bd",
"metadata": {},
"source": [
"# Streaming\n",
"## Streaming\n",
"\n",
":::{.callout-note}\n",
"[RunnableLambda](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html) is best suited for code that does not need to support streaming. If you need to support streaming (i.e., be able to operate on chunks of inputs and yield chunks of outputs), use [RunnableGenerator](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableGenerator.html) instead as in the example below.\n",
":::\n",
"\n",
"You can use generator functions (ie. functions that use the `yield` keyword, and behave like iterators) in a chain.\n",
"\n",
13 changes: 13 additions & 0 deletions docs/docs/integrations/document_loaders/microsoft_powerpoint.ipynb
Original file line number Diff line number Diff line change
@@ -12,6 +12,19 @@
"This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aef1500f",
"metadata": {},
"outputs": [],
"source": [
"# Install packages\n",
"%pip install unstructured\n",
"%pip install python-magic\n",
"%pip install python-pptx"
]
},
{
"cell_type": "code",
"execution_count": 1,
10 changes: 2 additions & 8 deletions docs/docs/tutorials/qa_chat_history.ipynb
Original file line number Diff line number Diff line change
@@ -322,7 +322,7 @@
"\n",
"Now we can build our full QA chain. This is as simple as updating the retriever to be our new `history_aware_retriever`.\n",
"\n",
"Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
"Again, we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer. A more detailed explaination is over [here](/docs/tutorials/rag/#built-in-chains)\n",
"\n",
"We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
]
@@ -760,13 +760,6 @@
"id": "931c4fe3-c603-4efb-9b37-5f7cbbb1cbbd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Error in LangChainTracer.on_tool_end callback: TracerException(\"Found chain run at ID 0ec120e2-b1fc-4593-9fee-2dd4f4cae256, but expected {'tool'} run.\")\n"
]
},
{
"data": {
"text/plain": [
@@ -1030,6 +1023,7 @@
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
"from langgraph.prebuilt import create_react_agent\n",
"\n",
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
3 changes: 2 additions & 1 deletion docs/scripts/generate_api_reference_links.py
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@

_CURRENT_PATH = Path(__file__).parent.absolute()
# Directory where generated markdown files are stored
_DOCS_DIR = _CURRENT_PATH / "docs"
_DOCS_DIR = _CURRENT_PATH.parent.parent / "docs"


def find_files(path):
@@ -75,6 +75,7 @@ def main():

for file in find_files(args.docs_dir):
file_imports = replace_imports(file)
print(file)

if file_imports:
# Use relative file path as key
16 changes: 14 additions & 2 deletions libs/community/langchain_community/document_loaders/parsers/pdf.py
Original file line number Diff line number Diff line change
@@ -89,7 +89,13 @@ def __init__(

def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
"""Lazily parse the blob."""
import pypdf
try:
import pypdf
except ImportError:
raise ImportError(
"`pypdf` package not found, please install it with "
"`pip install pypdf`"
)

with blob.as_bytes_io() as pdf_file_obj: # type: ignore[attr-defined]
pdf_reader = pypdf.PdfReader(pdf_file_obj, password=self.password)
@@ -144,7 +150,13 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-ty
"""Lazily parse the blob."""

if not self.extract_images:
from pdfminer.high_level import extract_text
try:
from pdfminer.high_level import extract_text
except ImportError:
raise ImportError(
"`pdfminer` package not found, please install it with "
"`pip install pdfminer.six`"
)

with blob.as_bytes_io() as pdf_file_obj: # type: ignore[attr-defined]
if self.concatenate_pages:
4 changes: 2 additions & 2 deletions libs/community/langchain_community/utilities/sql_database.py
Original file line number Diff line number Diff line change
@@ -201,10 +201,10 @@ def from_databricks(
from dbruntime.databricks_repl_context import get_context

context = get_context()
default_host = context.browserHostName
except ImportError:
pass
default_host = None

default_host = context.browserHostName if context else None
if host is None:
host = get_from_env("host", "DATABRICKS_HOST", default_host)

36 changes: 22 additions & 14 deletions libs/core/langchain_core/runnables/base.py
Original file line number Diff line number Diff line change
@@ -1357,6 +1357,7 @@ def with_alisteners(
Example:
.. code-block:: python
from langchain_core.runnables import RunnableLambda
import time
@@ -3388,6 +3389,7 @@ async def agen(input: AsyncIterator[Any]) -> AsyncIterator[str]:
RunnableGenerator makes it easy to implement custom behavior within a streaming
context. Below we show an example:
.. code-block:: python
from langchain_core.prompts import ChatPromptTemplate
@@ -4384,12 +4386,15 @@ def with_listeners(
Union[Callable[[Run], None], Callable[[Run, RunnableConfig], None]]
] = None,
) -> RunnableEach[Input, Output]:
"""
Bind lifecycle listeners to a Runnable, returning a new Runnable.
"""Bind lifecycle listeners to a Runnable, returning a new Runnable.
on_start: Called before the runnable starts running, with the Run object.
on_end: Called after the runnable finishes running, with the Run object.
on_error: Called if the runnable throws an error, with the Run object.
Args:
on_start: Called before the runnable starts running, with the Run object.
on_end: Called after the runnable finishes running, with the Run object.
on_error: Called if the runnable throws an error, with the Run object.
Returns:
A new Runnable with the listeners bound.
The Run object contains information about the run, including its id,
type, input, output, error, start_time, end_time, and any tags or metadata
@@ -4408,15 +4413,18 @@ def with_alisteners(
on_end: Optional[AsyncListener] = None,
on_error: Optional[AsyncListener] = None,
) -> RunnableEach[Input, Output]:
"""
Bind async lifecycle listeners to a Runnable, returning a new Runnable.
on_start: Called asynchronously before the runnable starts running,
with the Run object.
on_end: Called asynchronously after the runnable finishes running,
with the Run object.
on_error: Called asynchronously if the runnable throws an error,
with the Run object.
"""Bind async lifecycle listeners to a Runnable, returning a new Runnable.
Args:
on_start: Called asynchronously before the runnable starts running,
with the Run object.
on_end: Called asynchronously after the runnable finishes running,
with the Run object.
on_error: Called asynchronously if the runnable throws an error,
with the Run object.
Returns:
A new Runnable with the listeners bound.
The Run object contains information about the run, including its id,
type, input, output, error, start_time, end_time, and any tags or metadata
9 changes: 9 additions & 0 deletions libs/partners/pinecone/langchain_pinecone/vectorstores.py
Original file line number Diff line number Diff line change
@@ -118,6 +118,7 @@ def add_texts(
embedding_chunk_size: int = 1000,
*,
async_req: bool = True,
id_prefix: Optional[str] = None,
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.
@@ -133,6 +134,7 @@ def add_texts(
namespace: Optional pinecone namespace to add the texts to.
batch_size: Batch size to use when adding the texts to the vectorstore.
embedding_chunk_size: Chunk size to use when embedding the texts.
id_prefix: Optional string to use as an ID prefix when upserting vectors.
Returns:
List of ids from adding the texts into the vectorstore.
@@ -143,6 +145,10 @@ def add_texts(

texts = list(texts)
ids = ids or [str(uuid.uuid4()) for _ in texts]
if id_prefix:
ids = [
id_prefix + "#" + id if id_prefix + "#" not in id else id for id in ids
]
metadatas = metadatas or [{} for _ in texts]
for metadata, text in zip(metadatas, texts):
metadata[self._text_key] = text
@@ -406,6 +412,8 @@ def from_texts(
upsert_kwargs: Optional[dict] = None,
pool_threads: int = 4,
embeddings_chunk_size: int = 1000,
*,
id_prefix: Optional[str] = None,
**kwargs: Any,
) -> PineconeVectorStore:
"""Construct Pinecone wrapper from raw documents.
@@ -445,6 +453,7 @@ def from_texts(
namespace=namespace,
batch_size=batch_size,
embedding_chunk_size=embeddings_chunk_size,
id_prefix=id_prefix,
**(upsert_kwargs or {}),
)
return pinecone
129 changes: 121 additions & 8 deletions libs/partners/pinecone/poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libs/partners/pinecone/pyproject.toml
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ license = "MIT"
# <3.13 is due to restriction in pinecone-client package
python = ">=3.8.1,<3.13"
langchain-core = ">=0.1.52,<0.3"
pinecone-client = "^3.2.2"
pinecone-client = ">=3.2.2,<5"
# Support Python 3.8 and 3.12+.
numpy = [
{version = "^1", python = "<3.12"},
15 changes: 14 additions & 1 deletion libs/partners/pinecone/tests/unit_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from unittest.mock import Mock

from langchain_pinecone.vectorstores import Pinecone
from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


def test_initialization() -> None:
@@ -10,3 +10,16 @@ def test_initialization() -> None:
embedding = Mock()
text_key = "xyz"
Pinecone(index, embedding, text_key)


def test_id_prefix() -> None:
"""Test integration of the id_prefix parameter."""
embedding = Mock()
embedding.embed_documents = Mock(return_value=[0.1, 0.2, 0.3, 0.4, 0.5])
index = Mock()
index.upsert = Mock(return_value=None)
text_key = "testing"
vectorstore = PineconeVectorStore(index, embedding, text_key)
texts = ["alpha", "beta", "gamma", "delta", "epsilon"]
id_prefix = "testing_prefixes"
vectorstore.add_texts(texts, id_prefix=id_prefix, async_req=False)

0 comments on commit e96359c

Please sign in to comment.