diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b5e96a012..953e696c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -36,11 +36,12 @@ jobs: run: | uv venv uv build + uv pip install ."[dev]" ."[tools]" - name: Run tests run: | source .venv/bin/activate - uv run pytest --asyncio-mode=auto --maxfail=1 --disable-warnings tests/ + uv run pytest --maxfail=1 --disable-warnings tests/ - name: Check code style and compatibility run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f631027be..617c144dd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -29,11 +29,12 @@ jobs: run: | uv venv uv build + uv pip install ."[dev]" ."[tools]" - name: Run tests run: | source .venv/bin/activate - uv run pytest --asyncio-mode=auto --maxfail=1 --disable-warnings tests/ + uv run pytest --maxfail=1 --disable-warnings tests/ deploy: needs: test diff --git a/README.md b/README.md index 48b654d77..90a8e86e0 100644 --- a/README.md +++ b/README.md @@ -23,13 +23,33 @@ LionAGI is a robust framework for orchestrating multi-step AI operations with pr ``` pip install lionagi ``` +``` +poetry add lionagi +``` +``` +uv add lionagi +``` + +### optionally + +to install the tools dependency +``` +uv pip install "lionagi[tools]" +``` + +to install the dev dependency +``` +uv pip install "lionagi[dev]" +``` Dependencies: - • litellm - • jinja2 - • pandas - • pillow - • python-dotenv +- aiohttp +- aiocache +- jinja2 +- pandas +- pillow +- pydantic +- python-dotenv ## Quick Start ```python diff --git a/docs/modules/tools.rst b/docs/modules/tools.rst new file mode 100644 index 000000000..1f68325aa --- /dev/null +++ b/docs/modules/tools.rst @@ -0,0 +1,217 @@ +.. _lionagi-tools: + +================================================ +Tools System +================================================ +.. module:: lionagi.tools + :synopsis: Provides a collection of tools for file operations, code execution, and document management. + +Overview +-------- +The **tools** system in LionAGI provides a collection of specialized tools that inherit from +:class:`LionTool`. Each tool is designed to handle specific operations like file reading/writing, +code execution in sandboxes, or document management. The system includes: + +- :class:`ReaderTool`: For reading and chunking documents +- :class:`WriterTool`: For writing and saving files with restrictions +- :class:`CoderTool`: For sandbox code execution and file operations + +Contents +-------- +.. contents:: + :local: + :depth: 2 + + +Base Tool +--------- +.. class:: LionTool + :module: lionagi.tools.base + + **Abstract Base Class**: The foundation for all tools in LionAGI. + + Attributes + ---------- + is_lion_system_tool : bool + Flag indicating this is a system tool (True by default). + system_tool_name : str + The name used to identify this tool in the system. + + Methods + ------- + .. method:: to_tool() -> Tool + :abstractmethod: + + Must be implemented by subclasses to return a :class:`Tool` instance + that wraps this tool's functionality. + + +ReaderTool +---------- +.. class:: ReaderTool + :module: lionagi.tools.reader + + **Inherits from**: :class:`LionTool` + + A tool for reading, searching, and chunking documents. Supports operations like: + + - Opening files/URLs and converting to text + - Reading partial slices of documents + - Searching for substrings + - Splitting documents into chunks + - Managing multiple documents in memory + + The tool uses a document converter to handle various file formats and stores documents + in memory for efficient access. + + Actions + ------- + - **open**: Convert a file/URL to text and store for partial reads + - **read**: Return a partial slice of an opened document + - **search**: Find substrings in the document text + - **list_docs**: Show currently opened documents + - **close**: Remove a document from memory + - **chunk_doc**: Split text into memory chunks + - **list_chunks**: View chunk metadata + - **read_chunk**: Get a specific chunk + - **read_chunks**: Get multiple chunks + + Example + ------- + .. code-block:: python + + from lionagi.tools import ReaderTool + + reader = ReaderTool() + # Open a document + response = reader.handle_request({ + "action": "open", + "path_or_url": "example.txt" + }) + doc_id = response.doc_info.doc_id + + # Read a portion + chunk = reader.handle_request({ + "action": "read", + "doc_id": doc_id, + "start_offset": 0, + "end_offset": 100 + }) + + +WriterTool +---------- +.. class:: WriterTool + :module: lionagi.tools.writer + + **Inherits from**: :class:`LionTool` + + A tool for writing and saving files, with built-in safety restrictions on + where files can be written. Supports: + + - Creating/opening documents for writing + - Writing or appending text (with offset control) + - Saving files to disk (in allowed directories only) + - Managing multiple documents in memory + + The tool enforces a root directory restriction for all disk writes to prevent + unauthorized file access. + + Actions + ------- + - **open**: Create or open a document for writing + - **write**: Write/append text with optional offsets + - **list_docs**: Show documents in memory + - **close**: Remove a document + - **save_file**: Save text to disk (restricted) + - **save_chunks**: Save chunk objects to disk + + Example + ------- + .. code-block:: python + + from lionagi.tools import WriterTool + + writer = WriterTool(allowed_root="./data") + # Create a new document + response = writer.handle_request({ + "action": "open", + "path": "newfile.txt" + }) + doc_id = response.doc_info.doc_id + + # Write some content + writer.handle_request({ + "action": "write", + "doc_id": doc_id, + "content": "Hello, world!" + }) + + +CoderTool +--------- +.. class:: CoderTool + :module: lionagi.tools.coder + + **Inherits from**: :class:`LionTool` + + A tool that combines sandbox code execution (via E2B) with local file operations. + Provides: + + - E2B sandbox management for safe code execution + - Package installation in sandboxes + - File upload/download to/from sandboxes + - Local file string replacement + - Fuzzy file finding in Git repositories + - Shell command execution + + The tool requires an E2B API key for sandbox operations. + + Actions + ------- + **Sandbox Operations**: + - **start_sandbox**: Create new E2B sandbox + - **stop_sandbox**: Stop existing sandbox + - **list_sandboxes**: Show active sandboxes + - **run_code**: Execute code in sandbox + - **install_pkg**: Install packages (pip/npm/apt) + - **upload_file**: Send file to sandbox + - **download_file**: Get file from sandbox + + **Local Operations**: + - **file_str_replace**: Replace strings in files + - **fuzzy_find**: Search files with fuzzy matching + - **shell_command**: Run local shell commands + + Example + ------- + .. code-block:: python + + from lionagi.tools import CoderTool + + coder = CoderTool(e2b_api_key="your-key") + # Start a sandbox + response = coder.handle_request({ + "action": "start_sandbox" + }) + sandbox_id = response.sandbox_id + + # Run some Python code + coder.handle_request({ + "action": "run_code", + "sandbox_id": sandbox_id, + "code": "print('Hello from sandbox!')", + "language": "python" + }) + + +File Locations +------------- +- **base.py**: The :class:`LionTool` abstract base class +- **reader.py**: The :class:`ReaderTool` implementation +- **writer.py**: The :class:`WriterTool` implementation +- **coder.py**: The :class:`CoderTool` implementation +- **types.py**: Tool exports and type definitions + +``Copyright (c) 2023 - 2024, HaiyangLi `` +``SPDX-License-Identifier: Apache-2.0`` diff --git a/lionagi/libs/file/chunk.py b/lionagi/libs/file/chunk.py index 096204f4f..ac951a782 100644 --- a/lionagi/libs/file/chunk.py +++ b/lionagi/libs/file/chunk.py @@ -6,81 +6,72 @@ from collections.abc import Callable from typing import Any, Literal +from pydantic import BaseModel -def chunk_by_chars( - text: str, chunk_size: int = 2048, overlap: float = 0, threshold: int = 256 -) -> list[str]: + +class Chunk(BaseModel): + """ + Represents a partial slice of text read from [start_offset..end_offset). """ - Split a text into chunks of approximately equal size, with optional overlap. - This function divides the input text into chunks based on the specified - chunk size. It handles different scenarios based on the number of chunks - required and provides options for overlap between chunks. + start_offset: int | None = None + end_offset: int | None = None + chunk_content: str | None = None + chunk_size: int | None = None + chunk_id: int | None = None + total_chunks: int | None = None + metadata: dict[str, Any] | None = None - Args: - text (str): The input text to be chunked. - chunk_size (int, optional): The target size for each chunk. Defaults to 2048. - overlap (float, optional): The fraction of overlap between chunks. Defaults to 0. - threshold (int, optional): The minimum size for the last chunk. Defaults to 256. - Returns: - List[str]: A list of text chunks. +def chunk_by_chars( + text: str, chunk_size: int = 2048, overlap: float = 0, threshold: int = 256 +) -> list[tuple[str, int, int]]: + """ + Split a string into chunks of approximately chunk_size in length, + with optional overlap, returning a list of (subtext, start_offset, end_offset). - Raises: - ValueError: If an error occurs during the chunking process. + The last chunk is appended only if it has at least 'threshold' characters, + otherwise it is merged with the previous chunk. - Examples: - >>> text = "This is a sample text for chunking." - >>> chunks = chunk_by_chars(text, chunk_size=10, overlap=0.2) - >>> print(chunks) - ['This is a ', 'a sample ', 'le text fo', 'for chunki', 'king.'] + Returns: + list[tuple[str, int, int]]: A list of (chunk_text, start_offset, end_offset) """ - try: - n_chunks = math.ceil(len(text) / chunk_size) - overlap_size = int(chunk_size * overlap / 2) - - if n_chunks == 1: - return [text] - elif n_chunks == 2: - return _chunk_two_parts(text, chunk_size, overlap_size, threshold) - else: - return _chunk_multiple_parts( - text, chunk_size, overlap_size, n_chunks, threshold - ) - except Exception as e: - raise ValueError(f"An error occurred while chunking the text: {e}") - - -def _chunk_two_parts( - text: str, chunk_size: int, overlap_size: int, threshold: int -) -> list[str]: - """Handle chunking for two parts.""" - first_chunk = text[: chunk_size + overlap_size] - if len(text) - chunk_size > threshold: - return [first_chunk, text[chunk_size - overlap_size :]] - return [text] - - -def _chunk_multiple_parts( - text: str, - chunk_size: int, - overlap_size: int, - n_chunks: int, - threshold: int, -) -> list[str]: - """Handle chunking for more than two parts.""" - chunks = [text[: chunk_size + overlap_size]] - - for i in range(1, n_chunks - 1): - start_idx = chunk_size * i - overlap_size - end_idx = chunk_size * (i + 1) + overlap_size - chunks.append(text[start_idx:end_idx]) - - last_chunk_start = chunk_size * (n_chunks - 1) - overlap_size - if len(text) - last_chunk_start > threshold: - chunks.append(text[last_chunk_start:]) - else: - chunks[-1] += text[chunk_size * (n_chunks - 1) + overlap_size :] + if not text: + return [] + + n_chars = len(text) + n_chunks = math.ceil(n_chars / chunk_size) + overlap_size = int(chunk_size * overlap / 2) + + # Single chunk => no splitting + if n_chunks == 1: + return [(text, 0, n_chars)] + + # multiple chunks + step = chunk_size - overlap_size + chunks: list[tuple[str, int, int]] = [] + + start = 0 + while start < n_chars: + end = start + chunk_size + overlap_size + if end > n_chars: + end = n_chars + chunk_text = text[start:end] + chunks.append((chunk_text, start, end)) + start += step + + # check last chunk's threshold + # If the last chunk is smaller than 'threshold', + # merge it with the previous chunk + if len(chunks) > 1: + last_chunk_text, last_start, last_end = chunks[-1] + if (last_end - last_start) < threshold: + # merge into second-last chunk + second_last_text, second_last_start, second_last_end = chunks[-2] + merged_text = second_last_text + last_chunk_text + merged = (merged_text, second_last_start, last_end) + chunks[-2] = merged + chunks.pop() # remove last chunk return chunks @@ -91,122 +82,67 @@ def chunk_by_tokens( overlap: float = 0, threshold: int = 128, return_tokens: bool = False, -) -> list[str | list[str]]: +) -> list[tuple[str, int, int]]: """ - Split a list of tokens into chunks of approximately equal size, with optional overlap. + Split a list of tokens into textual chunks with approximate chunk_size, + returning a list of (subtext, start_offset, end_offset). + The offsets here refer to *token-based* indexing for convenience. - This function divides the input tokens into chunks based on the specified - chunk size. It handles different scenarios based on the number of chunks - required and provides options for overlap between chunks. - - Args: - tokens (list[str]): The input list of tokens to be chunked. - chunk_size (int, optional): The target size for each chunk. Defaults to 1024. - overlap (float, optional): The fraction of overlap between chunks. Defaults to 0. - threshold (int, optional): The minimum size for the last chunk. Defaults to 128. - return_tokens (bool, optional): If True, return chunks as lists of tokens; - if False, return as joined strings. Defaults to False. + If the last chunk is smaller than `threshold` tokens, merge it + with the previous chunk. Returns: - list[Union[str, list[str]]]: A list of chunked tokens, either as strings or token lists. - - Raises: - ValueError: If an error occurs during the chunking process. - - Examples: - >>> tokens = ["This", "is", "a", "sample", "text", "for", "chunking."] - >>> chunks = chunk_by_tokens(tokens, chunk_size=3, overlap=0.2) - >>> print(chunks) - ['This is a', 'a sample text', 'text for chunking.'] + List[Tuple[str, int, int]]: A list of (chunk_text, start_token_idx, end_token_idx). """ - try: - n_chunks = math.ceil(len(tokens) / chunk_size) - overlap_size = int(overlap * chunk_size / 2) - residue = len(tokens) % chunk_size - - if n_chunks == 1: - return _process_single_chunk(tokens, return_tokens) - elif n_chunks == 2: - return _chunk_token_two_parts( - tokens, - chunk_size, - overlap_size, - threshold, - residue, - return_tokens, - ) + if not tokens: + return [] + + n = len(tokens) + n_chunks = math.ceil(n / chunk_size) + overlap_size = int(chunk_size * overlap / 2) + + if n_chunks == 1: + # Return entire list as single chunk + text_chunk = tokens if return_tokens else " ".join(tokens) + return [(text_chunk, 0, n)] + + step = chunk_size - overlap_size + chunks: list[tuple[str, int, int]] = [] + + start = 0 + while start < n: + end = start + chunk_size + overlap_size + if end > n: + end = n + sub_tokens = tokens[start:end] + if return_tokens: + chunk_text = sub_tokens else: - return _chunk_token_multiple_parts( - tokens, - chunk_size, - overlap_size, - n_chunks, - threshold, - residue, - return_tokens, - ) - except Exception as e: - raise ValueError(f"An error occurred while chunking the tokens: {e}") - - -def _process_single_chunk( - tokens: list[str], return_tokens: bool -) -> list[str | list[str]]: - """Handle processing for a single chunk.""" - return [tokens] if return_tokens else [" ".join(tokens).strip()] - - -def _chunk_token_two_parts( - tokens: list[str], - chunk_size: int, - overlap_size: int, - threshold: int, - residue: int, - return_tokens: bool, -) -> list[str | list[str]]: - """Handle chunking for two parts.""" - chunks = [tokens[: chunk_size + overlap_size]] - if residue > threshold: - chunks.append(tokens[chunk_size - overlap_size :]) - else: - return _process_single_chunk(tokens, return_tokens) - return _format_chunks(chunks, return_tokens) - - -def _chunk_token_multiple_parts( - tokens: list[str], - chunk_size: int, - overlap_size: int, - n_chunks: int, - threshold: int, - residue: int, - return_tokens: bool, -) -> list[str | list[str]]: - """Handle chunking for more than two parts.""" - chunks = [tokens[: chunk_size + overlap_size]] - for i in range(1, n_chunks - 1): - start_idx = chunk_size * i - overlap_size - end_idx = chunk_size * (i + 1) + overlap_size - chunks.append(tokens[start_idx:end_idx]) - - last_chunk_start = chunk_size * (n_chunks - 1) - overlap_size - if len(tokens) - last_chunk_start > threshold: - chunks.append(tokens[last_chunk_start:]) - else: - chunks[-1] += tokens[-residue:] - - return _format_chunks(chunks, return_tokens) + chunk_text = " ".join(sub_tokens) + chunks.append((chunk_text, start, end)) + start += step + + # If last chunk < threshold tokens => merge with second-last + if len(chunks) > 1: + last_text, last_start, last_end = chunks[-1] + size_last = last_end - last_start + if size_last < threshold: + # merge + sl_text, sl_start, sl_end = chunks[-2] + if ( + return_tokens + and isinstance(last_text, list) + and isinstance(sl_text, list) + ): + merged_text = sl_text + last_text + else: + merged_text = f"{sl_text} {last_text}".strip() + + merged = (merged_text, sl_start, last_end) + chunks[-2] = merged + chunks.pop() - -def _format_chunks( - chunks: list[list[str]], return_tokens: bool -) -> list[str | list[str]]: - """Format chunks based on the return_tokens flag.""" - return ( - chunks - if return_tokens - else [" ".join(chunk).strip() for chunk in chunks] - ) + return chunks def chunk_content( @@ -216,54 +152,78 @@ def chunk_content( chunk_size: int = 1024, overlap: float = 0, threshold: int = 256, - metadata: dict[str, Any] = {}, + metadata: dict[str, Any] | None = None, return_tokens: bool = False, - **kwargs: Any, -) -> list[dict[str, Any]]: + **tokenizer_kwargs: Any, +) -> list[Chunk]: """ - Split content into chunks and add metadata. - - This function takes a string content, splits it into chunks using the provided - chunking function, and adds metadata to each chunk. + Split content into chunks using either character-based or token-based splitting, + and produce a list of `Chunk` pydantic models, each containing offsets, content, + size, id, total chunks, and any extra metadata. Args: - content (str): The content to be chunked. - chunk_by(str): The method to use for chunking: "chars" or "tokens". - tokenizer (Callable): The function to use for tokenization. defaults to str.split. - chunk_size (int): The target size for each chunk. - overlap (float): The fraction of overlap between chunks. - threshold (int): The minimum size for the last chunk. - metadata (Dict[str, Any]): Metadata to be included with each chunk. - kwargs for tokenizer, if needed. - + content (str): The text to be chunked (if chunk_by='chars') + or the string to tokenize (if chunk_by='tokens'). + chunk_by (Literal['chars','tokens']): 'chars' => chunk by chars, 'tokens' => chunk by tokens + tokenizer (Callable): A callable that takes the text and returns a list of tokens (only used if chunk_by='tokens'). + chunk_size (int): Target size for each chunk (characters or tokens). + overlap (float): Fraction of overlap (0 <= overlap < 1). + threshold (int): Minimum size for the last chunk. If smaller, merges with previous chunk. + metadata (Dict[str,Any]|None): Additional metadata to attach to each chunk. + return_tokens (bool): If chunk_by='tokens', return chunk as tokens or as a joined string. + **tokenizer_kwargs: Extra args for your tokenizer function. Returns: - List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata. + List[Chunk]: A list of chunk objects with offsets, chunk content, total chunks, etc. + + Example: + >>> text = \"This is a sample text for chunking.\" + >>> # chunk by chars + >>> chunk_list = chunk_content(text, chunk_by='chars', chunk_size=10, overlap=0.1) + >>> # chunk by tokens + >>> chunk_list = chunk_content(text, chunk_by='tokens', chunk_size=5, overlap=0.2) """ - + metadata = metadata or {} if chunk_by == "tokens": - chunks = chunk_by_tokens( - tokens=tokenizer(content, **kwargs), + # convert content to tokens + tokens = tokenizer(content, **tokenizer_kwargs) + subchunks = chunk_by_tokens( + tokens=tokens, chunk_size=chunk_size, overlap=overlap, threshold=threshold, return_tokens=return_tokens, ) else: - chunks = chunk_by_chars( + # chunk by chars + subchunks = chunk_by_chars( text=content, chunk_size=chunk_size, overlap=overlap, threshold=threshold, ) - return [ - { - "chunk_content": chunk, - "chunk_id": i + 1, - "total_chunks": len(chunks), - "chunk_size": len(chunk), - **metadata, - } - for i, chunk in enumerate(chunks) - ] + total = len(subchunks) + chunks_list: list[Chunk] = [] + + for i, (chunk_text, start_off, end_off) in enumerate(subchunks): + c = Chunk( + start_offset=start_off, + end_offset=end_off, + chunk_content=( + chunk_text + if isinstance(chunk_text, str) + else " ".join(chunk_text) + ), + chunk_size=( + len(chunk_text) + if isinstance(chunk_text, str) + else sum(len(t) for t in chunk_text) + ), + chunk_id=i + 1, + total_chunks=total, + metadata=metadata, + ) + chunks_list.append(c) + + return chunks_list diff --git a/lionagi/libs/file/process.py b/lionagi/libs/file/process.py index 029a73e13..a6c3e4bf2 100644 --- a/lionagi/libs/file/process.py +++ b/lionagi/libs/file/process.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any -from .chunk import chunk_content +from .chunk import Chunk, chunk_content from .save import save_chunks @@ -91,7 +91,7 @@ def file_to_chunks( verbose: bool = False, timestamp: bool = True, random_hash_digits: int = 4, -) -> list[dict[str, Any]]: +) -> list[Chunk]: """ Process a file and split its content into chunks. diff --git a/lionagi/libs/file/save.py b/lionagi/libs/file/save.py index 1c6f82fe1..9f8eb2b64 100644 --- a/lionagi/libs/file/save.py +++ b/lionagi/libs/file/save.py @@ -9,6 +9,8 @@ from lionagi.utils import create_path +from .chunk import Chunk + def save_to_file( text: str, @@ -70,7 +72,7 @@ def save_to_file( def save_chunks( - chunks: list[dict[str, Any]], + chunks: list[Chunk], output_dir: str | Path, verbose: bool, timestamp: bool, @@ -87,7 +89,7 @@ def save_chunks( random_hash_digits=random_hash_digits, ) save_to_file( - json.dumps(chunk, ensure_ascii=False, indent=2), + json.dumps(chunk.model_dump(), ensure_ascii=False, indent=2), directory=file_path.parent, filename=file_path.name, verbose=verbose, diff --git a/lionagi/libs/package/imports.py b/lionagi/libs/package/imports.py index 9f18f66c2..ad4cd946b 100644 --- a/lionagi/libs/package/imports.py +++ b/lionagi/libs/package/imports.py @@ -14,7 +14,7 @@ def check_import( package_name: str, module_name: str | None = None, - import_name: str | None = None, + import_name: str | list | None = None, pip_name: str | None = None, attempt_install: bool = True, error_message: str = "", @@ -25,7 +25,7 @@ def check_import( Args: package_name: The name of the package to check. module_name: The specific module to import (if any). - import_name: The specific name to import from the module (if any). + import_name: The specific name(s) to import from the module (if any). pip_name: The name to use for pip installation (if different). attempt_install: Whether to attempt installation if not found. error_message: Custom error message to use if package not found. diff --git a/lionagi/operations/ReAct/ReAct.py b/lionagi/operations/ReAct/ReAct.py index 7524ddc1c..cbacd33a1 100644 --- a/lionagi/operations/ReAct/ReAct.py +++ b/lionagi/operations/ReAct/ReAct.py @@ -55,6 +55,9 @@ async def ReAct( **(interpret_kwargs or {}), ) + if verbose_analysis: + print(f"Interpreted instruction: {instruction_str}") + # Convert Instruct to dict if necessary instruct_dict = ( instruct.to_dict() @@ -92,9 +95,11 @@ async def ReAct( ) # Validate and clamp max_extensions if needed - if max_extensions and max_extensions > 5: - logging.warning("max_extensions should not exceed 5; defaulting to 5.") - max_extensions = 5 + if max_extensions and max_extensions > 100: + logging.warning( + "max_extensions should not exceed 100; defaulting to 100." + ) + max_extensions = 100 # Step 2: Possibly loop through expansions if extension_needed extensions = max_extensions diff --git a/lionagi/operations/ReAct/utils.py b/lionagi/operations/ReAct/utils.py index 7d5704d44..76a04833a 100644 --- a/lionagi/operations/ReAct/utils.py +++ b/lionagi/operations/ReAct/utils.py @@ -4,7 +4,13 @@ from typing import ClassVar, Literal -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator + +from lionagi.libs.validate.common_field_validators import ( + validate_boolean_field, +) +from lionagi.libs.validate.fuzzy_validate_mapping import fuzzy_validate_mapping +from lionagi.utils import to_num class PlannedAction(BaseModel): @@ -13,15 +19,24 @@ class PlannedAction(BaseModel): The model can hold multiple actions in a single round if needed. """ - action_type: str = Field( - ..., - description="The name or type of tool/action to invoke (e.g., 'search_exa', 'reader_tool').", + action_type: str | None = Field( + default_factory=str, + description=( + "The name or type of tool/action to invoke (e.g., 'search_exa'," + " 'reader_tool') tool must exist in tool schemas." + ), ) - description: str = Field( - ..., + description: str | None = Field( + default_factory=str, description="A short explanation of why or what is intended to achieve with this action.", ) + @field_validator("action_type", "description", mode="before") + def _validate_action_type(cls, values): + if values is None: + return None + return str(values) + class ReActAnalysis(BaseModel): """ @@ -47,12 +62,12 @@ class ReActAnalysis(BaseModel): "to the user's request:\n\n{instruction}" ) - analysis: str = Field( - ..., - description="Free-form reasoning or chain-of-thought summary. Must be consistent with the plan.", + analysis: str | None = Field( + None, + description="Free-form reflective reasoning/chain-of-thought. Must be consistent with the plan.", ) - planned_actions: list[PlannedAction] = Field( + planned_actions: list[PlannedAction] | None = Field( default_factory=list, description=( "One or more short descriptors of the tool calls or operations " @@ -61,7 +76,7 @@ class ReActAnalysis(BaseModel): ), ) - extension_needed: bool = Field( + extension_needed: bool | None = Field( False, description="Set True if more expansions are needed. If False, final answer is next.", ) @@ -81,12 +96,56 @@ class ReActAnalysis(BaseModel): "'sequential' => Each action is run in order, \n" "'concurrent' => All actions run in parallel, \n" "'batch' => Divide actions into async batches of N (if reasonable)." + "typically only run in sequence if actions depend on each other (side effects)." + "if the actions are independent, run in parallel or batch for speed." ), ) action_batch_size: int | None = Field( None, description=( - "provide if and only if action_strategy is 'batch', this specifies the number of actions to run in parallel per batch." + "provide if and only if action_strategy is 'batch', this specifies " + "the number of actions to run in parallel per batch." ), ) + + @field_validator("extension_needed", mode="before") + def _validate_extension_needed(cls, values): + return validate_boolean_field(cls, values, False) + + @field_validator("action_batch_size", mode="before") + def _validate_action_batch_size(cls, values): + if values is None: + return None + try: + return to_num(values, num_type=int) + except ValueError: + return None + + @field_validator("action_strategy", mode="before") + def _validate_action_strategy(cls, values): + if values not in ["sequential", "concurrent", "batch"]: + return "concurrent" + return values + + @field_validator("planned_actions", mode="before") + def _validate_planned_actions(cls, values): + if not values: + return None + values = [] if not isinstance(values, list) else values + + out = [] + for i in values: + j = fuzzy_validate_mapping( + i, + ["action_type", "description"], + suppress_conversion_errors=True, + ) + if j: + out.append( + PlannedAction( + action_type=j["action_type"], + description=j["description"], + ) + ) + return out diff --git a/lionagi/operations/chat/chat.py b/lionagi/operations/chat/chat.py index 69dd4c19c..7594a0372 100644 --- a/lionagi/operations/chat/chat.py +++ b/lionagi/operations/chat/chat.py @@ -9,8 +9,10 @@ from lionagi.protocols.types import ( ActionResponse, AssistantResponse, + IDType, Instruction, Log, + Progression, RoledMessage, ) from lionagi.service.imodel import iModel @@ -20,6 +22,26 @@ from lionagi.session.branch import Branch +class OperationRequest(BaseModel): ... + + +class ChatRequest(OperationRequest): + + instruction: str | None = None + guidance: str | None = None + context: str | list[str] | None + request_fields: dict | None = None + response_format: type[BaseModel] = None + progression: Progression | None | list[IDType] = None + imodel: iModel | None = None + tool_schemas: list[dict] | dict | None = None + images: list = None + image_detail: Literal["low", "high", "auto"] = None + plain_content: str = None + + ... + + async def chat( branch: "Branch", instruction=None, diff --git a/lionagi/operations/instruct/instruct.py b/lionagi/operations/instruct/instruct.py index f166de839..cf8b46c0c 100644 --- a/lionagi/operations/instruct/instruct.py +++ b/lionagi/operations/instruct/instruct.py @@ -20,10 +20,10 @@ async def instruct( **(instruct.to_dict() if isinstance(instruct, Instruct) else instruct), **kwargs, } - if any(i in config and config[i] for i in Instruct.reserved_kwargs): + if any(i in config and config[i] for i in Instruct.react_kwargs): if "response_format" in config or "request_model" in config: return await branch.operate(**config) - for i in Instruct.reserved_kwargs: + for i in Instruct.react_kwargs: config.pop(i, None) return await branch.communicate(**config) diff --git a/lionagi/operations/interpret/interpret.py b/lionagi/operations/interpret/interpret.py index 086d5b7ec..fec353ae8 100644 --- a/lionagi/operations/interpret/interpret.py +++ b/lionagi/operations/interpret/interpret.py @@ -17,41 +17,7 @@ async def interpret( **kwargs, ) -> str: instruction = """ -You are given a user's raw instruction or question. Your task is to rewrite it into a clearer, more structured prompt for an LLM or system, making any implicit or missing details explicit. - -Follow these guidelines: - -1. **Dissect the user's request**: - - If the user references a local file, note it clearly (e.g., "paper_file_path": "…"). - - If the user might need external references or up-to-date data, mention that possibility. - - If the user's question is ambiguous, propose clarifications. - -2. **Be explicit about the user's final objective**: - - For example, if the user wants a comparison with other works, add that as a bullet point or sub-question. - - If the user wants a summary plus code snippet, highlight that in your structured prompt. - -3. **Do NOT produce final system actions**: - - You're not calling any tools directly here; only rewriting the user query to reflect potential next steps. - - If the user's request might require searching or doc reading, note it as an *option*, e.g. "Potential tool usage: {search, partial doc read}." - -4. **Return only the improved user prompt**: - - The final output should be a single text block or short JSON specifying the clarified user request. - - Keep it concise yet thorough. - -For instance, if the user's original text is: -"Please read my local PDF on RL and compare it to the newest research methods from exa or perplexity." - -A re-written version might be: -"**Task**: -- Summarize the local PDF (paper_file_path: 'myRLpaper.pdf'). -- Compare its approach with recent reinforcement learning research found via exa/perplexity searches. -**Potential Tool Usage**: -- Doc reading (reader_tool) -- External search (search_exa, search_perplexity) -**Output**: -- A structured summary + comparative analysis." - -Now, apply this rewriting to the input below. Return only the re-written prompt. +You are given a user's raw instruction or question. Your task is to rewrite it into a clearer, more structured prompt for an LLM or system, making any implicit or missing details explicit. Return only the re-written prompt. """ guidance = ( f"Domain hint: {domain or 'general'}. " diff --git a/lionagi/operations/parse/parse.py b/lionagi/operations/parse/parse.py index 729e23136..d85d8f3e8 100644 --- a/lionagi/operations/parse/parse.py +++ b/lionagi/operations/parse/parse.py @@ -73,7 +73,10 @@ async def parse( strict=strict, suppress_conversion_errors=suppress_conversion_errors, ) - response_model = request_type.model_validate(response_model) + try: + response_model = request_type.model_validate(response_model) + except Exception: + response_model = None if not isinstance(response_model, BaseModel): match handle_validation: diff --git a/lionagi/operatives/instruct/instruct.py b/lionagi/operatives/instruct/instruct.py index 15fab8d09..329180003 100644 --- a/lionagi/operatives/instruct/instruct.py +++ b/lionagi/operatives/instruct/instruct.py @@ -22,7 +22,7 @@ ) -class Instruct(HashableModel): +class ChatInstruct(HashableModel): """Model for defining instruction parameters and execution requirements. Attributes: @@ -31,17 +31,6 @@ class Instruct(HashableModel): context (JsonValue | None): Task context. """ - reserved_kwargs: ClassVar[list[str]] = [ - "operative_model", - "field_models", - "operative", - "reason", - "actions", - "action_strategy", - "batch_size", - "request_params", - "response_params", - ] instruction: JsonValue | None = Field( None, title="Primary Instruction", @@ -82,6 +71,14 @@ class Instruct(HashableModel): "Use None if no additional context is needed." ), ) + + @field_validator("instruction", "guidance", "context", mode="before") + def _validate_instruction(cls, v): + return validate_nullable_jsonvalue_field(cls, v) + + +class ReActInstruct(HashableModel): + reason: bool | None = Field( None, description=( @@ -111,10 +108,10 @@ class Instruct(HashableModel): None, description="Batch size for executing actions. Only provide for 'batch' strategy.", ) - - @field_validator("instruction", "guidance", "context", mode="before") - def _validate_instruction(cls, v): - return validate_nullable_jsonvalue_field(cls, v) + allowed_actions: list[str] | None = Field( + default_factory=list, + description="List of allowed actions for the current instruction.", + ) @field_validator("reason", "actions", mode="before") def _validate_reason(cls, v): @@ -132,22 +129,3 @@ def _validate_batch_size(cls, v): return to_num(v, num_type=int) except Exception: return None - - -INSTRUCT_FIELD = FieldModel( - name="instruct_model", - annotation=Instruct | None, - default=None, -) - - -class InstructResponse(HashableModel): - instruct: Instruct - response: Any | None = None - - -LIST_INSTRUCT_FIELD_MODEL = FieldModel( - name="instruct_models", - annotation=list[Instruct] | None, - default=None, -) diff --git a/lionagi/operatives/instruct/models.py b/lionagi/operatives/instruct/models.py new file mode 100644 index 000000000..f715aa05d --- /dev/null +++ b/lionagi/operatives/instruct/models.py @@ -0,0 +1,165 @@ +from typing import Any, ClassVar, Literal + +from pydantic import BaseModel, Field, JsonValue, field_validator + +from lionagi.libs.validate.common_field_validators import ( + validate_boolean_field, + validate_nullable_jsonvalue_field, +) +from lionagi.utils import HashableModel, to_num + +from ..models.field_model import FieldModel + + +class ChatInstruct(HashableModel): + """Model for defining instruction parameters and execution requirements. + + Attributes: + instruction (JsonValue | None): The primary instruction. + guidance (JsonValue | None): Execution guidance. + context (JsonValue | None): Task context. + """ + + instruction: JsonValue | None = Field( + None, + title="Primary Instruction", + description=( + "A clear, actionable task definition. Specify:\n" + "1) The primary goal or objective\n" + "2) Key success criteria or constraints\n" + "\n" + "Guidelines:\n" + "- Start with a direct action verb (e.g., 'Analyze', 'Generate', 'Create')\n" + "- Include scope, boundaries, or constraints\n" + "- Provide success criteria if relevant\n" + "- For complex tasks, break them into logical steps" + ), + ) + guidance: JsonValue | None = Field( + None, + title="Guidance", + description=( + "Strategic direction and constraints for executing the task. " + "Include:\n" + "1) Preferred methods or frameworks\n" + "2) Quality benchmarks (e.g., speed, clarity)\n" + "3) Resource or environmental constraints\n" + "4) Relevant compliance or standards\n" + "Use None if no special guidance." + ), + ) + context: JsonValue | None = Field( + None, + description=( + "Background information and current-state data needed for the task. " + "Should be:\n" + "1) Directly relevant\n" + "2) Sufficient to perform the task\n" + "3) Free of extraneous detail\n" + "Include environment, prior outcomes, system states, or dependencies. " + "Use None if no additional context is needed." + ), + ) + + @field_validator("instruction", "guidance", "context", mode="before") + def _validate_instruction(cls, v): + return validate_nullable_jsonvalue_field(cls, v) + + +class ReActInstruct(BaseModel): + + reason: bool | None = Field( + None, + description=( + "Include a thoughtful explanation of decisions, trade-offs, " + "and insights. Encourage deeper introspection on why certain " + "choices were made, potential alternatives, and how confidence " + "was shaped. If not needed, set to None." + ), + ) + actions: bool | None = Field( + None, + description=( + "Controls execution mode. " + "True: Execute specified actions. " + "False: Analysis/recommendations only. " + "None: Contextual execution." + ), + ) + action_strategy: Literal["batch", "sequential", "concurrent"] | None = ( + Field( + None, + description="Action strategy to use for executing actions. Default " + "is 'concurrent'. Only provide for if actions are enabled.", + ) + ) + action_batch_size: int | None = Field( + None, + description="Batch size for executing actions. Only provide for 'batch' strategy.", + ) + allowed_actions: list[str] | None = Field( + default_factory=list, + description="List of allowed actions for the current instruction.", + ) + + @field_validator("reason", "actions", mode="before") + def _validate_reason(cls, v): + return validate_boolean_field(cls, v) + + @field_validator("action_strategy", mode="before") + def _validate_action_strategy(cls, v): + if v not in ["batch", "sequential", "concurrent"]: + return "concurrent" + return v + + @field_validator("batch_size", mode="before") + def _validate_batch_size(cls, v): + try: + return to_num(v, num_type=int) + except Exception: + return None + + +class Instruct(ChatInstruct): + + react_kwargs: ClassVar[list[str]] = [ + "field_models", + "operative", + "reason", + "actions", + "action_strategy", + "batch_size", + "request_params", + "response_params", + ] + + react_options: ReActInstruct | None = Field( + None, + description="ReAct(reason + action)-specific instruction options.", + ) + + def to_dict(self): + dict_ = self.model_dump() + if self.react_options: + dict_.update(self.react_options.model_dump()) + dict_.pop("react_options", None) + return {k: v for k, v in dict_.items() if v is not None} + + +INSTRUCT_FIELD = FieldModel( + name="instruct_model", + annotation=Instruct | None, + default=None, +) + + +class InstructResponse(HashableModel): + instruct: Instruct + response: Any | None = None + + +LIST_INSTRUCT_FIELD_MODEL = FieldModel( + name="instruct_models", + annotation=list[Instruct] | None, + default=None, +) diff --git a/lionagi/service/endpoints/chat_completion.py b/lionagi/service/endpoints/chat_completion.py index 194a25fc1..008e3fb00 100644 --- a/lionagi/service/endpoints/chat_completion.py +++ b/lionagi/service/endpoints/chat_completion.py @@ -27,10 +27,15 @@ async def _invoke( headers: dict, **kwargs, ): - import litellm + from lionagi.libs.package.imports import check_import + + check_import("litellm") + + import litellm # type: ignore litellm.drop_params = True - from litellm import acompletion + + from litellm import acompletion # type: ignore provider = self.config.provider @@ -64,10 +69,14 @@ async def _stream( headers: dict, **kwargs, ) -> AsyncGenerator: - import litellm + from lionagi.libs.package.imports import check_import + + check_import("litellm") + + import litellm # type: ignore litellm.drop_params = True - from litellm import acompletion + from litellm import acompletion # type: ignore provider = self.config.provider diff --git a/lionagi/service/imodel.py b/lionagi/service/imodel.py index bdd53e128..cbda7a2db 100644 --- a/lionagi/service/imodel.py +++ b/lionagi/service/imodel.py @@ -2,11 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 +import asyncio import os import warnings from pydantic import BaseModel +from lionagi.protocols.generic.event import EventStatus + from .endpoints.base import APICalling, EndPoint from .endpoints.match_endpoint import match_endpoint from .endpoints.rate_limited_processor import RateLimitedAPIExecutor @@ -54,7 +57,7 @@ def __init__( interval: float | None = None, limit_requests: int = None, limit_tokens: int = None, - invoke_with_endpoint: bool = True, + invoke_with_endpoint: bool = False, **kwargs, ) -> None: """Initializes the iModel instance. @@ -232,8 +235,17 @@ async def invoke( await self.executor.append(api_call) await self.executor.forward() - if api_call.id in self.executor.completed_events: - return self.executor.pile.pop(api_call.id) + + ctr = 0 + while api_call.status not in [ + EventStatus.COMPLETED, + EventStatus.FAILED, + ]: + ctr += 1 + await asyncio.sleep(0.1) + if ctr > 100: + break + return self.executor.pile.pop(api_call.id) except Exception as e: raise ValueError(f"Failed to invoke API call: {e}") diff --git a/lionagi/service/providers/exa_/models.py b/lionagi/service/providers/exa_/models.py index dfedf0f21..c7ef9c5cd 100644 --- a/lionagi/service/providers/exa_/models.py +++ b/lionagi/service/providers/exa_/models.py @@ -103,9 +103,9 @@ class Contents(BaseModel): class ExaSearchRequest(BaseModel): - query: str = Field( - ..., - description="The main query string describing what you're looking for.", + query: str | None = Field( + default_factory=str, + description="**required** The natural language query query string describing what you're looking for.", ) category: None | CategoryEnum = Field( default=None, diff --git a/lionagi/session/branch.py b/lionagi/session/branch.py index 781a55b05..3aff35b3f 100644 --- a/lionagi/session/branch.py +++ b/lionagi/session/branch.py @@ -1363,7 +1363,7 @@ async def _batch_act( **kwargs, ) -> list: result = [] - async for i in await bcall( + async for i in bcall( action_request, self._act, batch_size=batch_size, **kwargs ): result.extend(i) diff --git a/lionagi/tools/browser/models.py b/lionagi/tools/browser/models.py new file mode 100644 index 000000000..720be7a35 --- /dev/null +++ b/lionagi/tools/browser/models.py @@ -0,0 +1,74 @@ +from enum import Enum +from typing import Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class BrowserAction(str, Enum): + """ + Basic web actions: + - 'open_url': Retrieve HTML/text from URL + - 'download_file': Download from URL -> local path + - 'screenshot': (Optional) capture screenshot + - 'click_element': (Optional) simulate click by CSS/xpath + """ + + open_url = "open_url" + download_file = "download_file" + screenshot = "screenshot" + click_element = "click_element" + + +class BrowserRequest(BaseModel): + """ + Request for BrowserTool. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "BrowserTool: Allows basic web interactions, reading content, " + "downloading, or simple element clicks." + ) + } + ) + + action: BrowserAction + url: Optional[str] = Field( + None, + description="For 'open_url','download_file'. The target web address.", + ) + local_path: Optional[str] = Field( + None, description="For 'download_file'. Where to save locally." + ) + selector: Optional[str] = Field( + None, + description="For 'click_element' or 'screenshot' partial region. A CSS or Xpath expression.", + ) + full_page: bool = Field( + False, + description="For 'screenshot'. If True, capture entire page, else only viewport.", + ) + + +class BrowserResponse(BaseModel): + """ + Response from BrowserTool. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "BrowserTool Response: Contains page content, downloaded file path, screenshot data, etc." + ) + } + ) + + success: bool + error: Optional[str] = Field( + None, description="If success=False, reason for failure." + ) + page_content: Optional[str] = None + saved_path: Optional[str] = None + screenshot_data: Optional[bytes] = None + click_result: Optional[str] = None diff --git a/lionagi/tools/chunker/chunker_tool.py b/lionagi/tools/chunker/chunker_tool.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/chunker/models.py b/lionagi/tools/chunker/models.py new file mode 100644 index 000000000..fd3ad3842 --- /dev/null +++ b/lionagi/tools/chunker/models.py @@ -0,0 +1,106 @@ +from enum import Enum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class ChunkAction(str, Enum): + """ + Actions dedicated to chunking doc text or loading chunks from disk: + + - 'chunk_doc': Create chunk metadata from a doc or file + - 'list_chunks': List chunk metadata for an existing chunk set + - 'read_chunks': Return the text of specific chunk indexes + - 'save_chunks': Save chunk data to disk + - 'load_chunks': Load chunk data from disk into memory + - 'close_chunks': Remove chunk set from memory + """ + + chunk_doc = "chunk_doc" + list_chunks = "list_chunks" + read_chunks = "read_chunks" + save_chunks = "save_chunks" + load_chunks = "load_chunks" + close_chunks = "close_chunks" + + +class ChunkRequest(BaseModel): + """ + Request model for ChunkTool. The LLM picks 'action' + relevant fields. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "ChunkTool: A specialized tool for splitting text into chunks, listing them, reading chunk content, " + "and optionally saving/loading chunk data to/from disk." + ) + } + ) + + action: ChunkAction = Field( + ..., description="Which chunk-based operation to perform." + ) + + doc_id: str | None = Field( + None, + description="If chunking from an in-memory doc, reference doc_id. If None, might chunk from file_path.", + ) + file_path: str | None = Field( + None, + description="If chunking directly from a local file instead of an in-memory doc.", + ) + + chunk_size: int = Field( + 1500, + description=("For 'chunk_doc'. Approx chunk size in chars."), + ) + overlap: float = Field( + 0.1, description="For 'chunk_doc'. Fraction of overlap [0..1)." + ) + threshold: int = Field( + 200, + description=( + "For 'chunk_doc'. Min final chunk size. If smaller, merges with prior chunk." + ), + ) + chunk_indexes: list[int] | None = Field( + None, + description=("For 'read_chunks'. The chunk indexes to retrieve."), + ) + # For saving/loading chunk data + directory: str | None = Field( + None, + description="For 'save_chunks','load_chunks'. The directory path in allowed root.", + ) + filename: str | None = Field( + None, + description="For 'save_chunks','load_chunks'. The chunk data file name.", + ) + chunks: list[Any] | None = Field( + None, + description=( + "For 'save_chunks'. The chunk objects to be written. If None, invalid for that action." + ), + ) + + +class ChunkResponse(BaseModel): + """ + Response from ChunkTool, including chunk metadata or read results. + """ + + success: bool + error: str | None = None + + chunk_list: list[dict] | None = Field( + None, + description="If 'chunk_doc','list_chunks', the chunk metadata objects.", + ) + chunks_read: list[dict] | None = Field( + None, + description="If 'read_chunks', the chunk data for requested indexes.", + ) + saved_path: str | None = Field( + None, description="If 'save_chunks', path to the chunk data file." + ) diff --git a/lionagi/tools/coder/__init__.py b/lionagi/tools/coder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/coder/coder.py b/lionagi/tools/coder/coder.py new file mode 100644 index 000000000..433f52cd2 --- /dev/null +++ b/lionagi/tools/coder/coder.py @@ -0,0 +1,893 @@ +# Copyright (c) 2023 - 2024, HaiyangLi +# +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import subprocess +import uuid +from enum import Enum +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field + +from lionagi.operatives.action.tool import Tool + +from ..base import LionTool + + +def run_command( + command_list: list[str], cwd: Path | None = None +) -> tuple[str, int]: + """ + Runs a shell command and returns (combined_stdout, exit_code). + No user interaction, no streaming. For advanced usage, handle pipes, etc. + """ + proc = subprocess.Popen( + command_list, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + out, _ = proc.communicate() + return out.decode("utf-8", errors="replace"), proc.returncode + + +def truncate_output(text: str, max_len: int = 5000) -> str: + """ + Truncate the output to prevent overly large results. + """ + if len(text) > max_len: + return text[:max_len] + "...(truncated)..." + return text + + +def truncate_display_str(s: str, max_length: int = 30) -> str: + """ + Truncate a string for display if it exceeds max_length. + """ + if len(s) <= max_length: + return s + return s[:max_length] + "..." + + +def format_string_for_display(s: str, threshold: int = 30) -> str: + """ + Format a string for display, showing either the entire string if short + or just a length-based descriptor if long. + """ + if len(s) <= threshold: + return f"'{s}'" + return f"[{len(s)} characters]" + + +# --------------------------- +# CODERTOOL SCHEMA +# --------------------------- + + +class CoderAction(str, Enum): + """ + The enumerated actions that the CoderTool can handle: + - E2B sandbox management: start/stop/list, run code, install pkg, upload/download + - Local file ops: file_str_replace, fuzzy_find, shell_command + """ + + start_sandbox = "start_sandbox" + stop_sandbox = "stop_sandbox" + list_sandboxes = "list_sandboxes" + run_code = "run_code" + install_pkg = "install_pkg" + upload_file = "upload_file" + download_file = "download_file" + + file_str_replace = "file_str_replace" + fuzzy_find = "fuzzy_find" + shell_command = "shell_command" + + +class CoderRequest(BaseModel): + """ + The input model for the CoderTool. Depending on the 'action', different fields + apply. The 'description' of each field is intended as a prompt/hint for the LLM. + """ + + action: CoderAction = Field( + ..., + description=( + "One of the enumerated coder actions:\n" + "- 'start_sandbox': Create a new E2B sandbox.\n" + "- 'stop_sandbox': Stop an existing sandbox.\n" + "- 'list_sandboxes': List all active sandbox IDs.\n" + "- 'run_code': Run code in a specified sandbox.\n" + "- 'install_pkg': Install a package in a sandbox.\n" + "- 'upload_file': Upload a local file to a sandbox.\n" + "- 'download_file': Download a file from a sandbox.\n" + "- 'file_str_replace': Replace an exact string in a local file.\n" + "- 'fuzzy_find': Fuzzy match files in a local Git repo.\n" + "- 'shell_command': Run a local shell command." + ), + ) + + # E2B sandbox fields + sandbox_id: str | None = Field( + None, + description=( + "Unique ID of the sandbox to operate on. Required for most sandbox actions " + "(e.g., 'run_code', 'install_pkg', 'upload_file', 'download_file', 'stop_sandbox')." + ), + ) + code: str | None = Field( + None, + description=( + "The source code to run if action='run_code'. Usually a Python or JS code snippet." + ), + ) + language: str | None = Field( + None, + description=( + "Programming language for 'run_code' (e.g. 'python', 'javascript'). Defaults to 'python'." + ), + ) + pkg_manager: str | None = Field( + None, + description=( + "If action='install_pkg', which package manager is used (e.g., 'pip', 'npm', 'apt', 'uv')." + ), + ) + pkg_name: str | None = Field( + None, + description=( + "If action='install_pkg', name of the package to install (e.g., 'requests')." + ), + ) + local_path: str | None = Field( + None, + description=( + "Local filesystem path used for file operations (upload_file, download_file), " + "or local ops like shell_command." + ), + ) + remote_path: str | None = Field( + None, + description=( + "In-sandbox path for upload_file or download_file (e.g., '/home/user/data.txt')." + ), + ) + template: str | None = Field( + None, + description=( + "If action='start_sandbox', optional E2B template ID (custom environment)." + ), + ) + cpu: int | None = Field( + None, + description=( + "If action='start_sandbox', optional CPU (vCPUs) for the sandbox (if supported)." + ), + ) + ram: int | None = Field( + None, + description=( + "If action='start_sandbox', optional RAM (in MB) for the sandbox (if supported)." + ), + ) + + # file_str_replace + filepath: str | None = Field( + None, + description=( + "If action='file_str_replace', path to the local file to modify." + ), + ) + old_str: str | None = Field( + None, + description=( + "If action='file_str_replace', the exact string to replace (must appear exactly once)." + ), + ) + new_str: str | None = Field( + None, + description=( + "If action='file_str_replace', the new string to insert in place of old_str." + ), + ) + + # fuzzy_find + search_term: str | None = Field( + None, + description=( + "If action='fuzzy_find', substring or partial name to match against local repo files." + ), + ) + repo_path: str = Field( + ".", + description=( + "If action='fuzzy_find', local path to the Git repository (default: current dir)." + ), + ) + threshold: int = Field( + 60, + description=( + "If action='fuzzy_find', minimum fuzzy match score (0-100). Default: 60." + ), + ) + max_results: int = Field( + 10, + description=( + "If action='fuzzy_find', max number of file matches to return. Default: 10." + ), + ) + include_paths: list[str] | None = Field( + None, + description=( + "If action='fuzzy_find', optional list of path patterns to include (e.g., ['*.py', '*.md'])." + ), + ) + exclude_patterns: list[str] | None = Field( + None, + description=( + "If action='fuzzy_find', optional list of path patterns to exclude (in addition to defaults)." + ), + ) + + # shell_command + command: str | None = Field( + None, + description=( + "If action='shell_command', the local command line string to execute (e.g. 'ls -lah')." + ), + ) + + verbose: bool = Field( + False, + description=( + "If true, produce more detailed console output (when a CLI with 'rich' is available)." + ), + ) + + +class RunCodeResult(BaseModel): + """ + Represents the outcome of running code in an E2B sandbox. + + - stdout: The standard output from the code execution + - stderr: The standard error output (if any) + - error: An error message if the code crashed or returned an exception + - result_objects: A list of returned objects (e.g., images, data) from E2B + """ + + stdout: str | None = None + stderr: str | None = None + error: str | None = None + result_objects: list[dict[str, Any]] | None = None + + +class CoderResponse(BaseModel): + """ + The structured response from the CoderTool. + Fields are populated depending on which action was taken. + """ + + success: bool = Field( + ..., + description="Indicates whether the requested action was performed successfully.", + ) + error: str | None = Field( + None, + description="Any error message or reason for failure if success=False.", + ) + + # For sandbox operations + sandbox_id: str | None = Field( + None, description="New or existing sandbox ID if relevant." + ) + sandbox_list: list[str] | None = Field( + None, + description="List of currently active sandbox IDs if action='list_sandboxes'.", + ) + + # For run_code + run_result: RunCodeResult | None = Field( + None, + description="Populated if action='run_code' succeeded, contains stdout/stderr/error.", + ) + + # For file_str_replace + message: str | None = Field( + None, + description="A success or informational message if action='file_str_replace'.", + ) + + # For fuzzy_find + fuzzy_matches: list[tuple[str, int]] | None = Field( + None, + description="List of (file_path, score) for fuzzy-matching if action='fuzzy_find'.", + ) + + # For shell_command + command_output: str | None = Field( + None, description="Truncated stdout of the local shell command." + ) + return_code: int | None = Field( + None, description="Exit code from the local shell command." + ) + + +class CoderTool(LionTool): + """ + Manages E2B sandbox operations (start/stop, run code, install packages, upload/download) + plus local file tasks (file_str_replace, fuzzy_find, shell_command). + Uses your new Pydantic models for request/response. in a Python 3.10 environment with optional 'uv' for dependency management. + """ + + is_lion_system_tool = True + system_tool_name = "coder_tool" + from lionagi.libs.package.imports import check_import + + Sandbox = check_import("e2b_code_interpreter", import_name="Sandbox") + Console = check_import("rich.console", import_name="Console") + Panel = check_import("rich.panel", import_name="Panel") + Markdown = check_import("rich.markdown", import_name="Markdown") + Repo = check_import("git", import_name="Repo") + InvalidGitRepositoryError = check_import( + "git.exc", import_name="InvalidGitRepositoryError" + ) + fuzz_process = check_import("fuzzywuzzy", import_name="process") + + def __init__(self, e2b_api_key: str): + super().__init__() + self.e2b_api_key = e2b_api_key + self.sandboxes = {} + self.console = CoderTool.Console() + self._tool = None + + def handle_request(self, request: CoderRequest) -> CoderResponse: + if isinstance(request, dict): + request = CoderRequest(**request) + + action = request.action + verbose = request.verbose + + # ---- E2B sandbox actions ---- + if action == CoderAction.start_sandbox: + return self._start_sandbox( + request.template, request.cpu, request.ram, verbose + ) + + elif action == CoderAction.stop_sandbox: + if not request.sandbox_id: + return CoderResponse( + success=False, + error="sandbox_id required for 'stop_sandbox'", + ) + return self._stop_sandbox(request.sandbox_id, verbose) + + elif action == CoderAction.list_sandboxes: + return self._list_sandboxes() + + elif action == CoderAction.run_code: + if not request.sandbox_id: + return CoderResponse( + success=False, error="sandbox_id required for 'run_code'" + ) + if not request.code: + return CoderResponse( + success=False, error="code is required for 'run_code'" + ) + return self._run_code( + request.sandbox_id, request.code, request.language, verbose + ) + + elif action == CoderAction.install_pkg: + if not request.sandbox_id: + return CoderResponse( + success=False, + error="sandbox_id required for 'install_pkg'", + ) + if not request.pkg_manager or not request.pkg_name: + return CoderResponse( + success=False, + error="pkg_manager and pkg_name are required", + ) + return self._install_pkg( + request.sandbox_id, + request.pkg_manager, + request.pkg_name, + verbose, + ) + + elif action == CoderAction.upload_file: + if not request.sandbox_id: + return CoderResponse( + success=False, + error="sandbox_id required for 'upload_file'", + ) + if not request.local_path or not request.remote_path: + return CoderResponse( + success=False, + error="local_path and remote_path are required", + ) + return self._upload_file( + request.sandbox_id, + request.local_path, + request.remote_path, + verbose, + ) + + elif action == CoderAction.download_file: + if not request.sandbox_id: + return CoderResponse( + success=False, + error="sandbox_id required for 'download_file'", + ) + if not request.local_path or not request.remote_path: + return CoderResponse( + success=False, + error="local_path and remote_path are required", + ) + return self._download_file( + request.sandbox_id, + request.local_path, + request.remote_path, + verbose, + ) + + # ---- Local file ops ---- + elif action == CoderAction.file_str_replace: + if ( + not request.filepath + or not request.old_str + or not request.new_str + ): + return CoderResponse( + success=False, + error="filepath, old_str, new_str are required", + ) + return self._file_str_replace( + request.filepath, request.old_str, request.new_str, verbose + ) + + elif action == CoderAction.fuzzy_find: + if not request.search_term: + return CoderResponse( + success=False, + error="search_term is required for 'fuzzy_find'", + ) + return self._fuzzy_find_project_files( + request.search_term, + request.repo_path, + request.threshold, + request.max_results, + request.include_paths, + request.exclude_patterns, + verbose, + ) + + elif action == CoderAction.shell_command: + if not request.command: + return CoderResponse( + success=False, + error="command is required for 'shell_command'", + ) + return self._run_shell_command(request.command, verbose) + + # unknown action + return CoderResponse(success=False, error="Unknown action type") + + # ----------------------------------------------------- + # E2B sandbox logic + # ----------------------------------------------------- + + def _start_sandbox( + self, + template: str | None, + cpu: int | None, + ram: int | None, + verbose: bool, + ) -> CoderResponse: + try: + sbx_id = f"sandbox_{uuid.uuid4().hex[:8]}" + if template: + sbx = CoderTool.Sandbox( + api_key=self.e2b_api_key, template=template + ) + else: + sbx = CoderTool.Sandbox(api_key=self.e2b_api_key) + + sbx.start() + self.sandboxes[sbx_id] = sbx + + if verbose and self.console: + self.console.print( + f"[green bold]Started sandbox:[/green bold] {sbx_id}" + ) + + return CoderResponse(success=True, sandbox_id=sbx_id) + except Exception as e: + logging.error(f"Failed to start sandbox: {e}") + return CoderResponse(success=False, error=str(e)) + + def _stop_sandbox(self, sandbox_id: str, verbose: bool) -> CoderResponse: + sbx = self.sandboxes.get(sandbox_id) + if not sbx: + return CoderResponse( + success=False, error=f"Sandbox not found: {sandbox_id}" + ) + + try: + sbx.kill() + except Exception as e: + logging.warning(f"Error stopping sandbox {sandbox_id}: {e}") + + del self.sandboxes[sandbox_id] + if verbose and self.console: + self.console.print( + f"[yellow bold]Stopped sandbox:[/yellow bold] {sandbox_id}" + ) + + return CoderResponse(success=True, sandbox_id=sandbox_id) + + def _list_sandboxes(self) -> CoderResponse: + return CoderResponse( + success=True, sandbox_list=list(self.sandboxes.keys()) + ) + + def _run_code( + self, + sandbox_id: str, + code: str, + language: str | None, + verbose: bool, + ) -> CoderResponse: + sbx = self.sandboxes.get(sandbox_id) + if not sbx: + return CoderResponse( + success=False, error=f"Sandbox not found: {sandbox_id}" + ) + + lang = language or "python" + try: + exec_result = sbx.run_code( + code, + language=lang, + on_stderr=lambda stderr: logging.info( + f"[{sandbox_id} stderr] {stderr}" + ), + on_stdout=lambda stdout: logging.info( + f"[{sandbox_id} stdout] {stdout}" + ), + ) + error_msg = exec_result.error.value if exec_result.error else None + + result_objs = [] + if exec_result.results: + for obj in exec_result.results: + result_objs.append( + obj.model_dump() if hasattr(obj, "model_dump") else {} + ) + + run_result = RunCodeResult( + stdout=exec_result.stdout, + stderr=exec_result.stderr, + error=error_msg, + result_objects=result_objs, + ) + + if verbose and self.console: + self.console.print( + f"[bold green]Code executed in sandbox {sandbox_id}[/bold green] (lang={lang})" + ) + + return CoderResponse(success=True, run_result=run_result) + except Exception as ex: + logging.error(f"Error running code in {sandbox_id}: {ex}") + return CoderResponse(success=False, error=str(ex)) + + def _install_pkg( + self, sandbox_id: str, manager: str, pkg_name: str, verbose: bool + ) -> CoderResponse: + sbx = self.sandboxes.get(sandbox_id) + if not sbx: + return CoderResponse( + success=False, error=f"Sandbox not found: {sandbox_id}" + ) + + try: + if manager == "pip": + cmd = f"pip install {pkg_name}" + elif manager == "npm": + cmd = f"npm install {pkg_name}" + elif manager == "apt": + cmd = f"apt-get update && apt-get install -y {pkg_name}" + elif manager == "uv": + cmd = f"uv install {pkg_name}" + else: + return CoderResponse( + success=False, error=f"Unsupported pkg_manager '{manager}'" + ) + + result = sbx.commands.run(cmd) + if result.exit_code != 0: + return CoderResponse( + success=False, + error=result.stderr + or f"Install failed: {pkg_name} with {manager}", + ) + + if verbose and self.console: + self.console.print( + f"[bold green]Installed '{pkg_name}' via {manager}[/bold green] in sandbox {sandbox_id}" + ) + + return CoderResponse(success=True) + except Exception as ex: + logging.error(f"Error installing {pkg_name} with {manager}: {ex}") + return CoderResponse(success=False, error=str(ex)) + + def _upload_file( + self, sandbox_id: str, local_path: str, remote_path: str, verbose: bool + ) -> CoderResponse: + sbx = self.sandboxes.get(sandbox_id) + if not sbx: + return CoderResponse( + success=False, error=f"Sandbox not found: {sandbox_id}" + ) + + if not os.path.exists(local_path): + return CoderResponse( + success=False, error=f"Local file not found: {local_path}" + ) + + try: + with open(local_path, "rb") as f: + file_bytes = f.read() + + res = sbx.files.write(remote_path, file_bytes) + if not res: + return CoderResponse( + success=False, error="Upload returned None or failed." + ) + + if verbose and self.console: + self.console.print( + f"[cyan bold]Uploaded[/cyan bold] {local_path} to sandbox:{remote_path}" + ) + + return CoderResponse(success=True) + except Exception as ex: + logging.error(f"Error uploading file: {ex}") + return CoderResponse(success=False, error=str(ex)) + + def _download_file( + self, sandbox_id: str, local_path: str, remote_path: str, verbose: bool + ) -> CoderResponse: + sbx = self.sandboxes.get(sandbox_id) + if not sbx: + return CoderResponse( + success=False, error=f"Sandbox not found: {sandbox_id}" + ) + + try: + content = sbx.files.read(remote_path) + if content is None: + return CoderResponse( + success=False, + error=f"Could not read remote file: {remote_path}", + ) + + with open(local_path, "wb") as f: + if isinstance(content, bytes): + f.write(content) + else: + f.write(content.encode("utf-8")) + + if verbose and self.console: + self.console.print( + f"[cyan bold]Downloaded[/cyan bold] sandbox:{remote_path} to {local_path}" + ) + + return CoderResponse(success=True) + except Exception as ex: + logging.error(f"Error downloading file: {ex}") + return CoderResponse(success=False, error=str(ex)) + + # ----------------------------------------------------- + # Local File Ops + # ----------------------------------------------------- + + def _file_str_replace( + self, filepath: str, old_str: str, new_str: str, verbose: bool + ) -> CoderResponse: + path = Path(filepath) + if not path.exists(): + msg = f"File not found: {filepath}" + if verbose and self.console: + self.console.print(f"[red]{msg}[/red]") + return CoderResponse(success=False, error=msg) + + content = path.read_text() + count = content.count(old_str) + if count == 0: + msg = f"String not found: {truncate_display_str(old_str)}" + if verbose and self.console: + self.console.print(f"[red]{msg}[/red]") + return CoderResponse(success=False, error=msg) + elif count > 1: + msg = f"String appears {count} times - must be unique" + if verbose and self.console: + self.console.print(f"[red]{msg}[/red]") + return CoderResponse(success=False, error=msg) + + new_content = content.replace(old_str, new_str) + path.write_text(new_content) + + if verbose: + replaced_msg = ( + f"Replaced in {filepath}:\n" + f"{format_string_for_display(old_str)} → {format_string_for_display(new_str)}" + ) + self.console.print( + CoderTool.Panel( + replaced_msg, + title="✓ String Replaced", + border_style="bright_blue", + ) + ) + + return CoderResponse( + success=True, + message=f"Replaced '{old_str}' with '{new_str}' in {filepath}", + ) + + def _fuzzy_find_project_files( + self, + search_term: str, + repo_path: str, + threshold: int, + max_results: int, + include_paths: list[str] | None, + exclude_patterns: list[str] | None, + verbose: bool, + ) -> CoderResponse: + if not 0 <= threshold <= 100: + return CoderResponse( + success=False, error="Threshold must be between 0 and 100" + ) + + try: + repo = CoderTool.Repo(repo_path) + except CoderTool.InvalidGitRepositoryError: + return CoderResponse( + success=False, error=f"Not a git repository: {repo_path}" + ) + + tracked_files = repo.git.ls_files().splitlines() + untracked_files = list(repo.untracked_files) + all_files = tracked_files + untracked_files + + if include_paths: + filtered = [] + for pattern in include_paths: + filtered.extend( + f for f in all_files if Path(repo_path, f).match(pattern) + ) + all_files = filtered + + DEFAULT_EXCLUDE = [ + "*.pyc", + "__pycache__/*", + ".git/*", + "*.so", + "*.o", + "*.class", + ] + exclude_list = DEFAULT_EXCLUDE + (exclude_patterns or []) + + def _exclude(f): + for pat in exclude_list: + if Path(repo_path, f).match(pat): + return True + return False + + all_files = [f for f in all_files if not _exclude(f)] + + results = CoderTool.process.extract( + search_term, all_files, limit=max_results + ) + filtered_matches = [ + (path, score) for (path, score) in results if score >= threshold + ] + + if verbose: + info_sections = [] + + params_section = [ + "## Search Parameters", + f"**Search Term**: `{search_term}`", + f"**Repository**: `{repo_path}`", + f"**Threshold**: {threshold}", + f"**Max Results**: {max_results}", + ] + if include_paths: + params_section.append("\n**Include Patterns**:") + for pat in include_paths: + params_section.append(f"- `{pat}`") + if exclude_patterns: + params_section.append("\n**Exclude Patterns**:") + for pat in exclude_patterns: + params_section.append(f"- `{pat}`") + info_sections.append("\n".join(params_section)) + + stats_section = [ + "## Results Statistics", + f"**Total Files Scanned**: {len(all_files)}", + f"**Matches Found**: {len(filtered_matches)}", + ] + info_sections.append("\n".join(stats_section)) + + if filtered_matches: + results_section = ["## Top Matches"] + for path, score in filtered_matches[:5]: + results_section.append(f"- `{path}` (score: {score})") + info_sections.append("\n".join(results_section)) + else: + info_sections.append("## Results\n*No matches found*") + + md_text = "\n\n".join(info_sections) + self.console.print( + CoderTool.Panel( + CoderTool.Markdown(md_text), + title="🔍 Fuzzy Find Results", + border_style="bright_blue", + ) + ) + + return CoderResponse(success=True, fuzzy_matches=filtered_matches) + + def _run_shell_command(self, command: str, verbose: bool) -> CoderResponse: + if verbose and self.console and CoderTool.Panel: + self.console.print( + CoderTool.Panel( + command, title="🐚 Shell", border_style="bright_yellow" + ) + ) + + try: + out, code = run_command(["/bin/bash", "-c", command]) + out = truncate_output(out) + + if verbose and self.console: + style = "bold green" if code == 0 else "bold red" + self.console.print( + f"[{style}]Return code={code}[/{style}]\n{out}" + ) + + return CoderResponse( + success=(code == 0), + command_output=out, + return_code=code, + error=None if code == 0 else "Non-zero exit code", + ) + except Exception as e: + if verbose and self.console: + self.console.print(f"[red]{e}[/red]") + return CoderResponse(success=False, error=str(e)) + + def to_tool(self): + if self._tool is None: + + def coder_tool(**kwargs): + """ + Entrypoint for the CoderTool. Provide a CoderRequest as JSON. + Returns a dict matching CoderResponse. + """ + resp = self.handle_request(CoderRequest(**kwargs)) + return resp.model_dump() + + if self.system_tool_name != "coder_tool": + coder_tool.__name__ = self.system_tool_name + + self._tool = Tool( + func_callable=coder_tool, + request_options=CoderRequest, + ) + return self._tool diff --git a/lionagi/tools/coder/models.py b/lionagi/tools/coder/models.py new file mode 100644 index 000000000..0f4c15db5 --- /dev/null +++ b/lionagi/tools/coder/models.py @@ -0,0 +1,120 @@ +from enum import Enum +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class CoderAction(str, Enum): + """ + Sandbox or code-based ops: + - 'start_sandbox': Start a new sandbox + - 'stop_sandbox': Terminate sandbox + - 'list_sandboxes': Show active sandbox IDs + - 'run_code': Execute snippet + - 'install_pkg': Install a package in sandbox + - 'upload_file': Copy local file -> sandbox + - 'download_file': Copy sandbox file -> local + - 'shell_command': (Optionally) run local shell command + """ + + start_sandbox = "start_sandbox" + stop_sandbox = "stop_sandbox" + list_sandboxes = "list_sandboxes" + run_code = "run_code" + install_pkg = "install_pkg" + upload_file = "upload_file" + download_file = "download_file" + shell_command = "shell_command" + + +class CoderRequest(BaseModel): + """ + Request for CoderTool, describing sandbox ops or code execution. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "CoderTool: Manages code execution in a secure sandbox (like E2B). " + "Supports package install, file upload/download, optional local shell." + ) + } + ) + + action: CoderAction + + sandbox_id: Optional[str] = Field( + None, description="For referencing an existing sandbox if required." + ) + code: Optional[str] = Field( + None, description="For 'run_code'. The snippet of Python/JS, etc." + ) + language: Optional[str] = Field( + None, + description="For 'run_code'. E.g. 'python'. If None, default='python'.", + ) + pkg_manager: Optional[str] = Field( + None, description="For 'install_pkg'. E.g. 'pip','npm','apt','uv'." + ) + pkg_name: Optional[str] = Field( + None, description="For 'install_pkg'. The package name to install." + ) + local_path: Optional[str] = Field( + None, + description=( + "For 'upload_file','download_file'. The path on host. If None, invalid for those." + ), + ) + remote_path: Optional[str] = Field( + None, + description=( + "For 'upload_file','download_file'. The path in sandbox. If None, invalid for those." + ), + ) + template: Optional[str] = Field( + None, + description="For 'start_sandbox'. A custom environment ID if needed.", + ) + cpu: Optional[int] = Field( + None, + description="For 'start_sandbox'. Desired CPU cores if supported.", + ) + ram: Optional[int] = Field( + None, + description="For 'start_sandbox'. Desired RAM in MB if supported.", + ) + command: Optional[str] = Field( + None, + description="For 'shell_command'. A local shell command if you allow it.", + ) + verbose: bool = Field(False, description="If True, produce extra logs.") + + +class RunCodeResult(BaseModel): + stdout: Optional[str] = None + stderr: Optional[str] = None + error: Optional[str] = None + result_objects: Optional[List[Dict[str, Any]]] = None + + +class CoderResponse(BaseModel): + """ + Response from CoderTool. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "CoderTool Response: Contains sandbox IDs, run_code results, shell command outputs, etc." + ) + } + ) + + success: bool + error: Optional[str] = None + + sandbox_id: Optional[str] = None + sandbox_list: Optional[List[str]] = None + run_result: Optional[RunCodeResult] = None + command_output: Optional[str] = None + return_code: Optional[int] = None diff --git a/lionagi/tools/coder/utils.py b/lionagi/tools/coder/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/__init__.py b/lionagi/tools/providers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/gh_/__init__.py b/lionagi/tools/providers/gh_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/gh_/gh_tool.py b/lionagi/tools/providers/gh_/gh_tool.py new file mode 100644 index 000000000..9e32820e6 --- /dev/null +++ b/lionagi/tools/providers/gh_/gh_tool.py @@ -0,0 +1,441 @@ +import logging +from enum import Enum +from pathlib import Path + +from pydantic import BaseModel, Field + +from lionagi.operatives.action.tool import Tool + +from ...base import LionTool + + +def run_git_command( + args: list[str], cwd: Path | None = None +) -> tuple[str, int]: + """ + Run a git command (e.g. ["git", "clone", ...]) in the specified working directory. + Returns (combined_stdout_stderr, exit_code). + """ + import subprocess + + proc = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd + ) + out, _ = proc.communicate() + return out.decode("utf-8"), proc.returncode + + +class GithubAction(str, Enum): + """ + Enumerates common GitHub/Git actions: + - 'list_repos': List the user's repositories on GitHub + - 'clone_repo': Clone a repository locally using 'git clone' + - 'create_branch': Create a new branch locally (e.g. 'git checkout -b') + - 'checkout_branch': Switch to an existing branch locally + - 'commit_push': Stage files, commit, and push changes to remote + - 'open_pull_request': Open a new pull request via GitHub API + - 'list_prs': List open pull requests via GitHub API + - 'merge_pr': Merge (or close) an existing pull request via GitHub API + """ + + list_repos = "list_repos" + clone_repo = "clone_repo" + create_branch = "create_branch" + checkout_branch = "checkout_branch" + commit_push = "commit_push" + open_pull_request = "open_pull_request" + list_prs = "list_prs" + merge_pr = "merge_pr" + + +class GithubRequest(BaseModel): + """ + The input model for the GitHubTool. Depending on 'action', different fields apply. + """ + + action: GithubAction = Field( + ..., + description=( + "Which GitHub operation to perform:\n" + "- 'list_repos'\n" + "- 'clone_repo'\n" + "- 'create_branch'\n" + "- 'checkout_branch'\n" + "- 'commit_push'\n" + "- 'open_pull_request'\n" + "- 'list_prs'\n" + "- 'merge_pr'" + ), + ) + + github_token: str | None = Field( + None, + description=( + "GitHub personal access token, required for certain API calls (list_repos, open_pull_request, etc.). " + "If not needed for local Git ops, this can be omitted." + ), + ) + + # Common fields + repo_url: str | None = Field( + None, + description=( + "For clone_repo, or if referencing a GitHub repo (e.g. 'https://github.com/user/repo.git')." + ), + ) + local_path: str | None = Field( + None, + description=( + "Local filesystem path to operate in (e.g., clone destination or existing repo path)." + ), + ) + branch_name: str | None = Field( + None, + description=( + "Name of the branch to create or checkout. Required for create_branch/checkout_branch." + ), + ) + commit_message: str | None = Field( + None, + description=("Commit message if action='commit_push'."), + ) + files_to_commit: list[str] | None = Field( + None, + description=( + "List of file paths to stage if action='commit_push'. If omitted, all changes are committed." + ), + ) + + # For pull requests + base_branch: str | None = Field( + None, + description=( + "The base branch (e.g. 'main') if opening or merging a PR." + ), + ) + pr_title: str | None = Field( + None, + description=( + "Title of the pull request if action='open_pull_request'." + ), + ) + pr_body: str | None = Field( + None, + description=( + "Body/description of the pull request if action='open_pull_request'." + ), + ) + pr_number: int | None = Field( + None, + description=( + "Pull request number if action='merge_pr', 'list_prs' (filter?), etc." + ), + ) + + +class RepoInfo(BaseModel): + """ + Minimal metadata about a GitHub repository. + """ + + name: str + full_name: str + private: bool + url: str + + +class PRInfo(BaseModel): + """ + Minimal metadata about a GitHub pull request. + """ + + number: int + title: str + user: str + url: str + + +class GithubResponse(BaseModel): + """ + The structured response from the GitHubTool, depending on the action. + """ + + success: bool = Field( + ..., + description="Indicates whether the requested GitHub action was successful.", + ) + error: str | None = Field( + None, + description="If success=False, this describes the error or failure reason.", + ) + + # For local git actions (clone_repo, create_branch, checkout_branch, commit_push): + output: str | None = Field( + None, + description=( + "Any combined stdout/stderr from local git commands if applicable." + ), + ) + + # For list_repos + repos: list[RepoInfo] | None = Field( + None, + description="Populated if action='list_repos' succeeded, listing the user's repositories.", + ) + + # For list_prs + prs: list[PRInfo] | None = Field( + None, + description="Populated if action='list_prs' succeeded, listing open pull requests.", + ) + + # For open_pull_request + pr_url: str | None = Field( + None, + description=( + "If action='open_pull_request' succeeded, URL of the newly created PR." + ), + ) + + +class GithubTool(LionTool): + """ + A tool for basic Git/GitHub operations. + - local Git commands with run_git_command + - optional GitHub API calls with PyGithub or direct REST + """ + + is_lion_system_tool = True + system_tool_name = "github_tool" + + from lionagi.libs.package.imports import check_import + + Github = check_import("github", import_name="Github", pip_name="PyGithub") + + def __init__(self, github_token: str | None = None): + super().__init__() + self.github_token = github_token + if github_token: + self.client = GithubTool.Github(github_token) + else: + self.client = None + self._tool = None + + def handle_request(self, request: GithubRequest) -> GithubResponse: + if isinstance(request, dict): + request = GithubRequest(**request) + + action = request.action + if action == GithubAction.list_repos: + return self._list_repos(request) + elif action == GithubAction.clone_repo: + return self._clone_repo(request) + elif action == GithubAction.create_branch: + return self._create_branch(request) + elif action == GithubAction.checkout_branch: + return self._checkout_branch(request) + elif action == GithubAction.commit_push: + return self._commit_push(request) + elif action == GithubAction.open_pull_request: + return self._open_pull_request(request) + elif action == GithubAction.list_prs: + return self._list_prs(request) + elif action == GithubAction.merge_pr: + return self._merge_pr(request) + + return GithubResponse(success=False, error="Unknown action") + + # ---------------------------------- + # LOCAL GIT COMMANDS + # ---------------------------------- + + def _clone_repo(self, request: GithubRequest) -> GithubResponse: + if not request.repo_url or not request.local_path: + return GithubResponse( + success=False, error="repo_url and local_path are required" + ) + + cmd = ["git", "clone", request.repo_url, request.local_path] + out, code = run_git_command(cmd) + if code != 0: + return GithubResponse(success=False, error=out) + return GithubResponse(success=True, output=out) + + def _create_branch(self, request: GithubRequest) -> GithubResponse: + if not request.local_path or not request.branch_name: + return GithubResponse( + success=False, error="local_path and branch_name are required" + ) + cmd = ["git", "checkout", "-b", request.branch_name] + out, code = run_git_command(cmd, cwd=Path(request.local_path)) + if code != 0: + return GithubResponse(success=False, error=out) + return GithubResponse(success=True, output=out) + + def _checkout_branch(self, request: GithubRequest) -> GithubResponse: + if not request.local_path or not request.branch_name: + return GithubResponse( + success=False, error="local_path and branch_name are required" + ) + cmd = ["git", "checkout", request.branch_name] + out, code = run_git_command(cmd, cwd=Path(request.local_path)) + if code != 0: + return GithubResponse(success=False, error=out) + return GithubResponse(success=True, output=out) + + def _commit_push(self, request: GithubRequest) -> GithubResponse: + if not request.local_path or not request.commit_message: + return GithubResponse( + success=False, + error="local_path and commit_message are required", + ) + + cwd = Path(request.local_path) + if request.files_to_commit: + cmd_add = ["git", "add"] + request.files_to_commit + else: + cmd_add = ["git", "add", "--all"] + out_add, code_add = run_git_command(cmd_add, cwd=cwd) + if code_add != 0: + return GithubResponse(success=False, error=out_add) + + cmd_commit = ["git", "commit", "-m", request.commit_message] + out_commit, code_commit = run_git_command(cmd_commit, cwd=cwd) + if code_commit != 0: + return GithubResponse(success=False, error=out_commit) + + cmd_push = ["git", "push", "origin", "HEAD"] + out_push, code_push = run_git_command(cmd_push, cwd=cwd) + if code_push != 0: + return GithubResponse(success=False, error=out_push) + + combined = f"{out_add}\n{out_commit}\n{out_push}" + return GithubResponse(success=True, output=combined) + + # ---------------------------------- + # GITHUB API CALLS + # ---------------------------------- + + def _list_repos(self, request: GithubRequest) -> GithubResponse: + if not self.client: + return GithubResponse( + success=False, + error="GitHub client not initialized (no token).", + ) + + try: + user = self.client.get_user() + repos_data = [] + for repo in user.get_repos(): + repos_data.append( + RepoInfo( + name=repo.name, + full_name=repo.full_name, + private=repo.private, + url=repo.html_url, + ) + ) + return GithubResponse(success=True, repos=repos_data) + except Exception as e: + return GithubResponse(success=False, error=str(e)) + + def _open_pull_request(self, request: GithubRequest) -> GithubResponse: + if not self.client: + return GithubResponse( + success=False, error="GitHub client not initialized." + ) + if ( + not request.repo_url + or not request.branch_name + or not request.base_branch + or not request.pr_title + ): + return GithubResponse( + success=False, + error="repo_url, branch_name, base_branch, pr_title required", + ) + + full_name = request.repo_url.replace( + "https://github.com/", "" + ).replace(".git", "") + try: + repo = self.client.get_repo(full_name) + pr = repo.create_pull( + title=request.pr_title, + body=request.pr_body or "", + head=request.branch_name, + base=request.base_branch, + ) + return GithubResponse(success=True, pr_url=pr.html_url) + except Exception as e: + logging.error(f"Error creating PR: {e}") + return GithubResponse(success=False, error=str(e)) + + def _list_prs(self, request: GithubRequest) -> GithubResponse: + if not self.client: + return GithubResponse( + success=False, error="GitHub client not initialized." + ) + if not request.repo_url: + return GithubResponse(success=False, error="repo_url required") + + full_name = request.repo_url.replace( + "https://github.com/", "" + ).replace(".git", "") + try: + repo = self.client.get_repo(full_name) + open_prs = repo.get_pulls(state="open") + data = [] + for pr in open_prs: + data.append( + PRInfo( + number=pr.number, + title=pr.title, + user=pr.user.login, + url=pr.html_url, + ) + ) + return GithubResponse(success=True, prs=data) + except Exception as e: + return GithubResponse(success=False, error=str(e)) + + def _merge_pr(self, request: GithubRequest) -> GithubResponse: + if not self.client: + return GithubResponse( + success=False, error="GitHub client not initialized." + ) + if not request.repo_url or not request.pr_number: + return GithubResponse( + success=False, error="repo_url and pr_number required" + ) + + full_name = request.repo_url.replace( + "https://github.com/", "" + ).replace(".git", "") + try: + repo = self.client.get_repo(full_name) + pull = repo.get_pull(request.pr_number) + res = pull.merge() + return GithubResponse(success=True, output=str(res)) + except Exception as e: + return GithubResponse(success=False, error=str(e)) + + def to_tool(self): + if self._tool is None: + + def github_tool(**kwargs): + """ + Unified tool interface for GitHub operations. + """ + return self.handle_request( + GithubRequest(**kwargs) + ).model_dump() + + if self.system_tool_name != "github_tool": + github_tool.__name__ = self.system_tool_name + + self._tool = Tool( + func_callable=github_tool, + request_options=GithubRequest, + ) + return self._tool diff --git a/lionagi/tools/providers/gh_/models.py b/lionagi/tools/providers/gh_/models.py new file mode 100644 index 000000000..a1e034146 --- /dev/null +++ b/lionagi/tools/providers/gh_/models.py @@ -0,0 +1,117 @@ +from enum import Enum +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class GithubAction(str, Enum): + """ + Local Git or GitHub API calls: + - 'list_repos': Show user GH repos + - 'clone_repo': Clone a remote repo locally + - 'create_branch': New branch locally + - 'checkout_branch': Switch branch + - 'commit_push': Stage & commit local changes, push to remote + - 'open_pull_request': Create new PR + - 'list_prs': List open PRs + - 'merge_pr': Merge or close a PR + """ + + list_repos = "list_repos" + clone_repo = "clone_repo" + create_branch = "create_branch" + checkout_branch = "checkout_branch" + commit_push = "commit_push" + open_pull_request = "open_pull_request" + list_prs = "list_prs" + merge_pr = "merge_pr" + + +class GithubRequest(BaseModel): + """ + Request for GithubTool, describing local or remote git ops. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "GithubTool: For local Git commands and GitHub API tasks. " + "Use 'repo_url' for remote repos, 'local_path' for local repo dir." + ) + } + ) + + action: GithubAction + + github_token: Optional[str] = Field( + None, description="Personal GH token if needed for GH API calls." + ) + repo_url: Optional[str] = Field( + None, + description="For 'clone_repo' or referencing GH repo. E.g. 'https://github.com/user/repo.git'.", + ) + local_path: Optional[str] = Field( + None, + description="For local Git commands, the local repo path on disk.", + ) + branch_name: Optional[str] = Field( + None, + description="For 'create_branch','checkout_branch'. The branch name.", + ) + commit_message: Optional[str] = Field( + None, description="For 'commit_push'. The commit message." + ) + files_to_commit: Optional[List[str]] = Field( + None, description="For 'commit_push'. If None, stage all changes." + ) + base_branch: Optional[str] = Field( + None, + description="For 'open_pull_request'. The base branch (e.g. 'main').", + ) + pr_title: Optional[str] = Field( + None, description="For 'open_pull_request'. The new PR's title." + ) + pr_body: Optional[str] = Field( + None, description="For 'open_pull_request'. The PR body." + ) + pr_number: Optional[int] = Field( + None, + description="For 'merge_pr' or 'list_prs'. The PR # if filtering or merging.", + ) + + +class RepoInfo(BaseModel): + name: str + full_name: str + private: bool + url: str + + +class PRInfo(BaseModel): + number: int + title: str + user: str + url: str + + +class GithubResponse(BaseModel): + """ + Response from GithubTool. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "GithubTool Response: Contains local Git outputs or GH API results (repos, PR info, etc.)." + ) + } + ) + + success: bool + error: Optional[str] = Field( + None, description="If success=False, reason for failure." + ) + output: Optional[str] = None + repos: Optional[List[RepoInfo]] = None + prs: Optional[List[PRInfo]] = None + pr_url: Optional[str] = None diff --git a/lionagi/tools/providers/gh_/utils.py b/lionagi/tools/providers/gh_/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/mcp_/__init__.py b/lionagi/tools/providers/mcp_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/mcp_/mcp_tool.py b/lionagi/tools/providers/mcp_/mcp_tool.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/mcp_/models.py b/lionagi/tools/providers/mcp_/models.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/providers/mcp_/utils.py b/lionagi/tools/providers/mcp_/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/reader.py b/lionagi/tools/reader.py deleted file mode 100644 index 42ebd2f8d..000000000 --- a/lionagi/tools/reader.py +++ /dev/null @@ -1,244 +0,0 @@ -import tempfile -from enum import Enum - -from pydantic import BaseModel, Field, field_validator - -from lionagi.operatives.action.tool import Tool -from lionagi.utils import to_num - -from .base import LionTool - - -class ReaderAction(str, Enum): - """ - This enumeration indicates the *type* of action the LLM wants to perform. - - 'open': Convert a file/URL to text and store it internally for partial reads - - 'read': Return a partial slice of the already-opened doc - """ - - open = "open" - read = "read" - - -class ReaderRequest(BaseModel): - """ - The request model for the 'ReaderTool'. - It indicates: - - whether we are 'open'-ing a doc or 'read'-ing from a doc - - which file/URL we want to open (if action='open') - - which doc_id and offsets we want to read (if action='read') - """ - - action: ReaderAction = Field( - ..., - description=( - "Action to perform. Must be one of: " - "- 'open': Convert a file/URL to text and store it internally for partial reads. " - "- 'read': Return a partial slice of the already-opened doc." - ), - ) - - path_or_url: str | None = Field( - None, - description=( - "Local file path or remote URL to open. This field is REQUIRED if action='open'. " - "If action='read', leave it None." - ), - ) - - doc_id: str | None = Field( - None, - description=( - "Unique ID referencing a previously opened document. " - "This field is REQUIRED if action='read'. If action='open', leave it None." - ), - ) - - start_offset: int | None = Field( - None, - description=( - "Character start offset in the doc for partial reading. " - "If omitted or None, defaults to 0. Only used if action='read'." - ), - ) - - end_offset: int | None = Field( - None, - description=( - "Character end offset in the doc for partial reading. " - "If omitted or None, we read until the document's end. Only used if action='read'." - ), - ) - - @field_validator("start_offset", "end_offset", mode="before") - def _validate_offsets(cls, v): - try: - return to_num(v, num_type=int) - except ValueError: - return None - - -class DocumentInfo(BaseModel): - """ - Returned info when we 'open' a doc. - doc_id: The unique string to reference this doc in subsequent 'read' calls - length: The total character length of the converted text - """ - - doc_id: str - length: int | None = None - - -class PartialChunk(BaseModel): - """ - Represents a partial slice of text from [start_offset..end_offset). - """ - - start_offset: int | None = None - end_offset: int | None = None - content: str | None = None - - -class ReaderResponse(BaseModel): - """ - The response from the 'ReaderTool'. - - If action='open' succeeded, doc_info is filled (doc_id & length). - - If action='read' succeeded, chunk is filled (the partial text). - - If failure occurs, success=False & error hold details. - """ - - success: bool = Field( - ..., - description=( - "Indicates if the requested action was performed successfully." - ), - ) - error: str | None = Field( - None, - description=("Describes any error that occurred, if success=False."), - ) - doc_info: DocumentInfo | None = Field( - None, - description=( - "Populated only if action='open' succeeded, letting the LLM know doc_id & total length." - ), - ) - chunk: PartialChunk | None = Field( - None, - description=( - "Populated only if action='read' succeeded, providing the partial slice of text." - ), - ) - - -class ReaderTool(LionTool): - """ - A single tool that the LLM can call with ReaderRequest to either: - - open a doc (File/URL) -> returns doc_id, doc length - - read partial text from doc -> returns chunk - """ - - is_lion_system_tool = True - system_tool_name = "reader_tool" - - from lionagi.libs.package.imports import check_import - - DocumentConverter = check_import( - "docling", - module_name="document_converter", - import_name="DocumentConverter", - ) - - def __init__(self): - super().__init__() - self.converter = ReaderTool.DocumentConverter() - self.documents = {} # doc_id -> (temp_file_path, doc_length) - self._tool = None - - def handle_request(self, request: ReaderRequest) -> ReaderResponse: - """ - A function that takes ReaderRequest to either: - - open a doc (File/URL) -> returns doc_id, doc length - - read partial text from doc -> returns chunk - """ - if isinstance(request, dict): - request = ReaderRequest(**request) - if request.action == "open": - return self._open_doc(request.path_or_url) - elif request.action == "read": - return self._read_doc( - request.doc_id, request.start_offset, request.end_offset - ) - else: - return ReaderResponse(success=False, error="Unknown action type") - - def _open_doc(self, source: str) -> ReaderResponse: - try: - result = self.converter.convert(source) - text = result.document.export_to_markdown() - except Exception as e: - return ReaderResponse( - success=False, error=f"Conversion error: {str(e)}" - ) - - doc_id = f"DOC_{abs(hash(source))}" - temp_file = tempfile.NamedTemporaryFile( - delete=False, mode="w", encoding="utf-8" - ) - temp_file.write(text) - doc_len = len(text) - temp_file.close() - - # store info - self.documents[doc_id] = (temp_file.name, doc_len) - - return ReaderResponse( - success=True, doc_info=DocumentInfo(doc_id=doc_id, length=doc_len) - ) - - def _read_doc(self, doc_id: str, start: int, end: int) -> ReaderResponse: - if doc_id not in self.documents: - return ReaderResponse( - success=False, error="doc_id not found in memory" - ) - - path, length = self.documents[doc_id] - # clamp offsets - s = max(0, start if start is not None else 0) - e = min(length, end if end is not None else length) - - try: - with open(path, encoding="utf-8") as f: - f.seek(s) - content = f.read(e - s) - except Exception as ex: - return ReaderResponse( - success=False, error=f"Read error: {str(ex)}" - ) - - return ReaderResponse( - success=True, - chunk=PartialChunk(start_offset=s, end_offset=e, content=content), - ) - - def to_tool(self): - if self._tool is None: - - def reader_tool(**kwargs): - """ - A function that takes ReaderRequest to either: - - open a doc (File/URL) -> returns doc_id, doc length - - read partial text from doc -> returns chunk - """ - return self.handle_request( - ReaderRequest(**kwargs) - ).model_dump() - - if self.system_tool_name != "reader_tool": - reader_tool.__name__ = self.system_tool_name - - self._tool = Tool( - func_callable=reader_tool, - request_options=ReaderRequest, - ) - return self._tool diff --git a/lionagi/tools/reader/__init__.py b/lionagi/tools/reader/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/reader/models.py b/lionagi/tools/reader/models.py new file mode 100644 index 000000000..fd07494f5 --- /dev/null +++ b/lionagi/tools/reader/models.py @@ -0,0 +1,101 @@ +from enum import Enum + +from pydantic import BaseModel, ConfigDict, Field + + +class ReaderAction(str, Enum): + """ + The enumerated actions for reading local files or in-memory docs: + - 'open_doc': Convert a file/URL to text in memory, return doc_id + - 'read_doc': Retrieve partial text from doc by offsets + - 'search_doc': Find substring in doc text, return positions + - 'list_docs': Show doc_ids stored in memory + - 'close_doc': Remove a doc from memory + """ + + open_doc = "open_doc" + read_doc = "read_doc" + search_doc = "search_doc" + list_docs = "list_docs" + close_doc = "close_doc" + + +class ReaderRequest(BaseModel): + """ + Request model for ReaderTool. The LLM sets 'action' + relevant fields. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "ReaderTool: Use these actions to read text from local files or memory. " + "No chunking or writing is done here—this is read-only usage." + ) + } + ) + + action: ReaderAction = Field( + ..., description="Which read-oriented action to perform." + ) + + path_or_url: str | None = Field( + None, + description=( + "If action='open_doc', a local file path or URL to convert into doc text in memory." + ), + ) + doc_id: str | None = Field( + None, + description=( + "For 'read_doc','search_doc','close_doc'. The ID referencing an opened doc in memory." + ), + ) + start_offset: int | None = Field( + None, + description=( + "For 'read_doc'. The start offset in doc text. If None, default=0." + ), + ) + end_offset: int | None = Field( + None, + description=( + "For 'read_doc'. The end offset in doc text. If None, read until doc end." + ), + ) + search_query: str | None = Field( + None, + description=( + "For 'search_doc'. The substring to find. If None, invalid for that action." + ), + ) + + +class ReaderResponse(BaseModel): + """ + Response from the ReaderTool, capturing read results or doc listings. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "ReaderTool Response: Contains partial content, search results, doc list, etc." + ) + } + ) + + success: bool + error: str | None = Field( + None, description="If success=False, reason for failure." + ) + + doc_id: str | None = None + doc_list: list[str] | None = None + + partial_content: str | None = Field( + None, + description="If 'read_doc' succeeds, the text slice read from [start_offset..end_offset).", + ) + positions: list[int] | None = Field( + None, + description="If 'search_doc', the positions in text where search_query is found.", + ) diff --git a/lionagi/tools/reader/reader_tool.py b/lionagi/tools/reader/reader_tool.py new file mode 100644 index 000000000..72b29a4f8 --- /dev/null +++ b/lionagi/tools/reader/reader_tool.py @@ -0,0 +1,371 @@ +import tempfile +from enum import Enum +from typing import Any + +from pydantic import BaseModel, Field, field_validator, model_validator + +from lionagi.libs.file.chunk import Chunk, chunk_content +from lionagi.operatives.action.tool import Tool +from lionagi.utils import to_num + +from ..base import LionTool +from .models import ( + ChunkMetadata, + DocumentInfo, + ReaderAction, + ReaderRequest, + ReaderResponse, + SearchResult, +) + + +class ReaderTool(LionTool): + """ + A ReaderTool that the LLM can call to open docs, read them, search, + chunk them in memory, and read specific chunks. + """ + + is_lion_system_tool = True + system_tool_name = "reader_tool" + + from lionagi.libs.package.imports import check_import + + DocumentConverter = check_import( + "docling", + module_name="document_converter", + import_name="DocumentConverter", + ) + + def __init__(self): + super().__init__() + self.converter = ReaderTool.DocumentConverter() + # We'll store each doc's data in a dict: + # documents[doc_id] = { + # "text_path": "/temp/file", + # "length": int, + # "chunks": [ChunkMetadata, ...] + # } + self.documents: dict[str, dict[str, Any]] = {} + self._tool = None + + def handle_request(self, request: ReaderRequest) -> ReaderResponse: + if isinstance(request, dict): + request = ReaderRequest(**request) + + action = request.action + + if action == ReaderAction.open: + if not request.path_or_url: + return ReaderResponse( + success=False, error="path_or_url is required for 'open'" + ) + return self._open_doc(request.path_or_url) + + elif action == ReaderAction.read: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'read'" + ) + return self._read_doc( + request.doc_id, request.start_offset, request.end_offset + ) + + elif action == ReaderAction.search: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'search'" + ) + if not request.search_query: + return ReaderResponse( + success=False, + error="search_query is required for 'search'", + ) + return self._search_doc(request.doc_id, request.search_query) + + elif action == ReaderAction.list_docs: + return self._list_docs() + + elif action == ReaderAction.close: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'close'" + ) + return self._close_doc(request.doc_id) + + elif action == ReaderAction.chunk_doc: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'chunk_doc'" + ) + return self._chunk_doc( + doc_id=request.doc_id, + chunk_size=request.chunk_size, + overlap=request.overlap, + threshold=request.threshold, + ) + + elif action == ReaderAction.list_chunks: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'list_chunks'" + ) + return self._list_chunks(request.doc_id) + + elif action == ReaderAction.read_chunk: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'read_chunk'" + ) + if request.chunk_index is None: + return ReaderResponse( + success=False, + error="chunk_index is required for 'read_chunk'", + ) + return self._read_chunk(request.doc_id, request.chunk_index) + + elif action == ReaderAction.read_chunks: + if not request.doc_id: + return ReaderResponse( + success=False, error="doc_id is required for 'read_chunks'" + ) + if not request.chunk_indexes: + return ReaderResponse( + success=False, + error="chunk_indexes is required for 'read_chunks'", + ) + return self._read_chunks(request.doc_id, request.chunk_indexes) + + else: + return ReaderResponse(success=False, error="Unknown action type") + + # ------------------- + # Core doc operations + # ------------------- + + def _open_doc(self, source: str) -> ReaderResponse: + try: + result = self.converter.convert(source) + text = result.document.export_to_markdown() + if not text.strip(): + return ReaderResponse( + success=False, + error="Parsed document text is empty. Check if docling can parse this PDF.", + ) + except Exception as e: + return ReaderResponse( + success=False, error=f"Conversion error: {str(e)}" + ) + + doc_id = f"DOC_{abs(hash(source))}" + temp_file = tempfile.NamedTemporaryFile( + delete=False, mode="w", encoding="utf-8" + ) + temp_file.write(text) + temp_file.close() + + self.documents[doc_id] = { + "text_path": temp_file.name, + "length": len(text), + "chunks": [], + } + + return ReaderResponse( + success=True, + doc_info=DocumentInfo(doc_id=doc_id, length=len(text)), + ) + + def _read_doc( + self, doc_id: str, start: int | None, end: int | None + ) -> ReaderResponse: + if doc_id not in self.documents: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + + doc_data = self.documents[doc_id] + path = doc_data["text_path"] + length = doc_data["length"] + + s = max(0, start if start is not None else 0) + e = min(length, end if end is not None else length) + + try: + with open(path, encoding="utf-8") as f: + f.seek(s) + content = f.read(e - s) + except Exception as ex: + return ReaderResponse( + success=False, error=f"Read error: {str(ex)}" + ) + + return ReaderResponse( + success=True, + chunk=PartialChunk(start_offset=s, end_offset=e, content=content), + ) + + def _search_doc(self, doc_id: str, query: str) -> ReaderResponse: + if doc_id not in self.documents: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + + doc_data = self.documents[doc_id] + path = doc_data["text_path"] + try: + with open(path, encoding="utf-8") as f: + text = f.read() + except Exception as ex: + return ReaderResponse( + success=False, error=f"Search read error: {str(ex)}" + ) + + positions = [] + start_index = 0 + while True: + pos = text.find(query, start_index) + if pos == -1: + break + positions.append(pos) + start_index = pos + 1 + + return ReaderResponse( + success=True, search_result=SearchResult(positions=positions) + ) + + def _list_docs(self) -> ReaderResponse: + return ReaderResponse( + success=True, doc_list=list(self.documents.keys()) + ) + + def _close_doc(self, doc_id: str) -> ReaderResponse: + if doc_id not in self.documents: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + del self.documents[doc_id] + return ReaderResponse(success=True) + + # ---------------------- + # Chunk-based operations + # ---------------------- + + def _chunk_doc( + self, doc_id: str, chunk_size: int, overlap: float, threshold: int + ) -> ReaderResponse: + doc_data = self.documents.get(doc_id) + if not doc_data: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + + path = doc_data["text_path"] + try: + with open(path, encoding="utf-8") as f: + text = f.read() + except Exception as e: + return ReaderResponse( + success=False, error=f"Error reading doc: {str(e)}" + ) + + from lionagi.libs.file.chunk import chunk_content + + chunk_texts = chunk_content( + text, chunk_size=chunk_size, overlap=overlap, threshold=threshold + ) + + # Optional: debug print for your logs + # print(f"[DEBUG] chunk_content returned {len(chunk_texts)} chunk(s).") + + if not chunk_texts: + return ReaderResponse( + success=False, + error=( + "chunk_content returned an empty list of chunks. Possibly " + "the text is empty or chunking logic needs adjustment." + ), + ) + + chunk_meta_list: list[ChunkMetadata] = [] + + # The effective step between chunk starts + step = int(chunk_size - (overlap * chunk_size)) + current_start = 0 + + for i, ctext in enumerate(chunk_texts): + c_len = len(ctext) + chunk_meta_list.append( + ChunkMetadata( + index=i, + start=current_start, + end=current_start + c_len, + text=ctext, + ) + ) + current_start += step + + doc_data["chunks"] = chunk_meta_list + return ReaderResponse(success=True, chunk_list=chunk_meta_list) + + def _list_chunks(self, doc_id: str) -> ReaderResponse: + doc_data = self.documents.get(doc_id) + if not doc_data: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + return ReaderResponse(success=True, chunk_list=doc_data["chunks"]) + + def _read_chunk(self, doc_id: str, chunk_index: int) -> ReaderResponse: + doc_data = self.documents.get(doc_id) + if not doc_data: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + + chunks = doc_data.get("chunks", []) + if chunk_index < 0 or chunk_index >= len(chunks): + return ReaderResponse( + success=False, error="chunk_index out of range" + ) + + return ReaderResponse(success=True, chunks_read=[chunks[chunk_index]]) + + def _read_chunks(self, doc_id: str, indexes: list[int]) -> ReaderResponse: + doc_data = self.documents.get(doc_id) + if not doc_data: + return ReaderResponse( + success=False, error="doc_id not found in memory" + ) + + chunks = doc_data.get("chunks", []) + result = [] + for i in indexes: + if 0 <= i < len(chunks): + result.append(chunks[i]) + else: + # either skip or raise an error + pass + + return ReaderResponse(success=True, chunks_read=result) + + def to_tool(self): + if self._tool is None: + + def reader_tool(**kwargs): + """ + The single entrypoint for using the ReaderTool. Accepts a + ReaderRequest (JSON). When needing specific features don't + make up things like `read_chunks`, specify the operation by + configuring the different fields in the ReaderRequest model. + """ + return self.handle_request( + ReaderRequest(**kwargs) + ).model_dump() + + if self.system_tool_name != "reader_tool": + reader_tool.__name__ = self.system_tool_name + + self._tool = Tool( + func_callable=reader_tool, + request_options=ReaderRequest, + ) + return self._tool diff --git a/lionagi/tools/reader/utils.py b/lionagi/tools/reader/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/researcher/__init__.py b/lionagi/tools/researcher/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/researcher/models.py b/lionagi/tools/researcher/models.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/researcher/researcher_tool.py b/lionagi/tools/researcher/researcher_tool.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/researcher/utils.py b/lionagi/tools/researcher/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/types.py b/lionagi/tools/types.py index a18f595cf..5d00f86d0 100644 --- a/lionagi/tools/types.py +++ b/lionagi/tools/types.py @@ -1,3 +1,11 @@ -from .reader import ReaderTool +from .coder.coder import CoderTool +from .providers.gh_.gh_tool import GithubTool +from .reader.reader_tool import ReaderTool +from .writer.writer import WriterTool -__all__ = ("ReaderTool",) +__all__ = ( + "GithubTool", + "CoderTool", + "WriterTool", + "ReaderTool", +) diff --git a/lionagi/tools/writer/__init__.py b/lionagi/tools/writer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/writer/models.py b/lionagi/tools/writer/models.py new file mode 100644 index 000000000..d85587d46 --- /dev/null +++ b/lionagi/tools/writer/models.py @@ -0,0 +1,85 @@ +from enum import Enum + +from pydantic import BaseModel, ConfigDict, Field + + +class WriterAction(str, Enum): + """ + The enumerated actions for writing or modifying local files/docs: + - 'open_doc': Create or open doc in memory for writing + - 'write_doc': Overwrite/append partial text + - 'list_docs': Show doc_ids stored in memory + - 'close_doc': Remove doc from memory + - 'save_file': Save text to disk within allowed root + """ + + open_doc = "open_doc" + write_doc = "write_doc" + list_docs = "list_docs" + close_doc = "close_doc" + save_file = "save_file" + + +class WriterRequest(BaseModel): + """ + Request model for WriterTool. LLM picks 'action' + relevant fields. + """ + + model_config = ConfigDict( + json_schema_extra={ + "description": ( + "WriterTool: Use these actions to create or modify docs in memory, " + "or save text to disk." + ) + } + ) + + action: WriterAction = Field( + ..., description="Which write-oriented action to perform." + ) + + path: str | None = Field( + None, + description=( + "If action='open_doc', an existing file path to load text from. If not found or None, create empty doc." + ), + ) + doc_id: str | None = Field( + None, + description="For 'write_doc','close_doc'. The in-memory doc ID to modify or remove.", + ) + content: str | None = Field( + None, + description="For 'write_doc' or 'save_file'. The text to be written.", + ) + start_offset: int | None = Field( + None, + description="For 'write_doc'. The start offset if partially overwriting. If None, append at doc end.", + ) + end_offset: int | None = Field( + None, + description="For 'write_doc'. The end offset. If None, start_offset+len(content).", + ) + directory: str | None = Field( + None, + description="For 'save_file'. The directory under allowed root to write the file.", + ) + filename: str | None = Field( + None, description="For 'save_file'. The filename in that directory." + ) + + +class WriterResponse(BaseModel): + """ + Response from the WriterTool, showing doc state or file path saved. + """ + + success: bool + error: str | None = Field( + None, description="If success=False, reason for failure." + ) + + doc_id: str | None = None + doc_list: list[str] | None = None + updated_length: int | None = None + saved_path: str | None = None diff --git a/lionagi/tools/writer/utils.py b/lionagi/tools/writer/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/lionagi/tools/writer/writer.py b/lionagi/tools/writer/writer.py new file mode 100644 index 000000000..c71bd82d8 --- /dev/null +++ b/lionagi/tools/writer/writer.py @@ -0,0 +1,425 @@ +import logging +import tempfile +from enum import Enum +from pathlib import Path + +from pydantic import BaseModel, Field, field_validator + +from lionagi.operatives.action.tool import Tool +from lionagi.utils import to_num + +from ..base import LionTool + + +class WriterAction(str, Enum): + """ + This enumeration indicates the *type* of action the LLM wants to perform: + - 'open': Create or open a document in memory for writing + - 'write': Write or append text to an opened document (partial offsets) + - 'list_docs': List doc_ids in memory + - 'close': Remove a previously opened doc from memory + - 'save_file': Save text to a file on disk (restricted to allowed directory) + - 'save_chunks': Save a list of chunk objects to a single file (also restricted) + """ + + open = "open" + write = "write" + list_docs = "list_docs" + close = "close" + save_file = "save_file" + save_chunks = "save_chunks" + + +class WriterRequest(BaseModel): + """ + The request model for the 'WriterTool'. + It indicates: + - action: one of ('open', 'write', 'list_docs', 'close', 'save_file', 'save_chunks') + - path: path to open if action='open' (optional) + - doc_id: required if action='write' or 'close' + - content: text to write if action='write' or 'save_file' + - start_offset, end_offset: partial overwrite range if action='write' + - directory, filename: used if action='save_file' or 'save_chunks' + - chunks: list of chunk data to save if action='save_chunks' + """ + + action: WriterAction = Field( + ..., + description=( + "Action to perform. Must be one of: " + "- 'open': Open/create a doc in memory. " + "- 'write': Write partial text to a doc. " + "- 'list_docs': List open doc_ids. " + "- 'close': Close a doc. " + "- 'save_file': Save text to disk (allowed directory only). " + "- 'save_chunks': Save chunk objects to disk." + ), + ) + + path: str | None = Field( + None, + description=( + "Local file path to open for writing. If not provided, we create an " + "empty doc in memory. Used only if action='open'." + ), + ) + + doc_id: str | None = Field( + None, + description=( + "Unique ID referencing a previously opened document. " + "Required for 'write' or 'close'. Not used if action='open' or " + "'list_docs' or 'save_file'/'save_chunks'." + ), + ) + + content: str | None = Field( + None, + description=( + "Text to write if action='write' or 'save_file'. If action='save_file', " + "this is the file's content." + ), + ) + + start_offset: int | None = Field( + None, + description=( + "Character start offset in the doc for partial writing. " + "If omitted or None, append at the end. Used only if action='write'." + ), + ) + + end_offset: int | None = Field( + None, + description=( + "Character end offset in the doc for partial overwrite. " + "If omitted or None, default is start_offset + len(content). " + "Only used if action='write'." + ), + ) + + directory: str | None = Field( + None, + description=( + "Directory in which to save the file or chunks if action='save_file' or 'save_chunks'. " + "Must be within the allowed root directory." + ), + ) + + filename: str | None = Field( + None, + description=("Filename used if action='save_file' or 'save_chunks'."), + ) + + chunks: list | None = Field( + None, + description=( + "List of chunk objects to save if action='save_chunks'. " + "Each chunk could be a dict with text, metadata, etc." + ), + ) + + @field_validator("start_offset", "end_offset", mode="before") + def _validate_offsets(cls, v): + try: + return to_num(v, num_type=int) + except ValueError: + return None + + +class WriterDocumentInfo(BaseModel): + """ + Returned info when we 'open' a doc for writing. + doc_id: The unique ID to reference this doc in subsequent 'write' calls + length: The total character length of the doc in memory + """ + + doc_id: str + length: int + + +class WriterResponse(BaseModel): + """ + The response from the 'WriterTool'. + + - If action='open', doc_info is set on success. + - If action='write', updated_length is set on success. + - If action='list_docs', doc_list is set. + - If action='save_file' or 'save_chunks', saved_path is set if successful. + - If failure, success=False & error holds details. + """ + + success: bool = Field( + ..., + description="Indicates if the requested action was performed successfully.", + ) + error: str | None = Field( + None, + description="Describes any error that occurred, if success=False.", + ) + + doc_info: WriterDocumentInfo | None = None + updated_length: int | None = None + doc_list: list[str] | None = None + + saved_path: str | None = Field( + None, + description="Path where the file or chunks were saved, if action='save_file'/'save_chunks'.", + ) + + +class WriterTool(LionTool): + """ + A WriterTool that stores docs in memory and restricts disk writes to an allowed root directory. + """ + + is_lion_system_tool = True + system_tool_name = "writer_tool" + + def __init__(self, allowed_root: str): + """ + :param allowed_root: The only directory (and subdirs) where we permit file saving. + """ + super().__init__() + # doc_id -> (temp_file_path, doc_length) + self.documents = {} + + # For restricted disk writes: + self.allowed_root = Path(allowed_root).resolve() + + self._tool = None + + def handle_request(self, request: WriterRequest) -> WriterResponse: + if isinstance(request, dict): + request = WriterRequest(**request) + + action = request.action + + if action == WriterAction.open: + return self._open_doc(request.path) + + elif action == WriterAction.write: + if not request.doc_id: + return WriterResponse( + success=False, error="doc_id is required for 'write'" + ) + if request.content is None: + return WriterResponse( + success=False, error="content is required for 'write'" + ) + return self._write_doc( + request.doc_id, + request.content, + request.start_offset, + request.end_offset, + ) + + elif action == WriterAction.list_docs: + return self._list_docs() + + elif action == WriterAction.close: + if not request.doc_id: + return WriterResponse( + success=False, error="doc_id is required for 'close'" + ) + return self._close_doc(request.doc_id) + + elif action == WriterAction.save_file: + if not request.directory or not request.filename: + return WriterResponse( + success=False, + error="directory and filename are required for 'save_file'", + ) + if request.content is None: + return WriterResponse( + success=False, error="content is required for 'save_file'" + ) + return self._save_file( + text=request.content, + directory=request.directory, + filename=request.filename, + ) + + elif action == WriterAction.save_chunks: + if not request.directory or not request.filename: + return WriterResponse( + success=False, + error="directory and filename are required for 'save_chunks'", + ) + if not request.chunks: + return WriterResponse( + success=False, + error="chunks list is required for 'save_chunks'", + ) + return self._save_chunks( + chunks=request.chunks, + directory=request.directory, + filename=request.filename, + ) + + return WriterResponse(success=False, error="Unknown action type") + + # ------------------------ + # In-memory doc management + # ------------------------ + + def _open_doc(self, path: str | None) -> WriterResponse: + """ + If path is given, read existing file content into memory. + If not found or None, create empty doc. + """ + original_text = "" + if path is not None: + try: + with open(path, encoding="utf-8") as f: + original_text = f.read() + except FileNotFoundError: + pass # treat as empty if not found + + temp_file = tempfile.NamedTemporaryFile( + delete=False, mode="w", encoding="utf-8" + ) + temp_file.write(original_text) + temp_file.close() + + doc_id = f"WRITER_{abs(hash(path if path else temp_file.name))}" + self.documents[doc_id] = (temp_file.name, len(original_text)) + + return WriterResponse( + success=True, + doc_info=WriterDocumentInfo( + doc_id=doc_id, length=len(original_text) + ), + ) + + def _write_doc( + self, doc_id: str, content: str, start: int | None, end: int | None + ) -> WriterResponse: + if doc_id not in self.documents: + return WriterResponse( + success=False, error="doc_id not found in memory" + ) + + path, length = self.documents[doc_id] + try: + with open(path, encoding="utf-8") as f: + old_text = f.read() + except Exception as e: + return WriterResponse(success=False, error=f"Read error: {str(e)}") + + if start is None: + # Append + new_text = old_text + content + else: + s = max(0, start) + if end is None: + e = s + len(content) + else: + e = max(s, min(end, len(old_text))) + new_text = old_text[:s] + content + old_text[e:] + + try: + with open(path, "w", encoding="utf-8") as f: + f.write(new_text) + except Exception as e: + return WriterResponse( + success=False, error=f"Write error: {str(e)}" + ) + + updated_length = len(new_text) + self.documents[doc_id] = (path, updated_length) + return WriterResponse(success=True, updated_length=updated_length) + + def _list_docs(self) -> WriterResponse: + return WriterResponse( + success=True, doc_list=list(self.documents.keys()) + ) + + def _close_doc(self, doc_id: str) -> WriterResponse: + if doc_id not in self.documents: + return WriterResponse( + success=False, error="doc_id not found in memory" + ) + del self.documents[doc_id] + return WriterResponse(success=True) + + # ------------------------ + # Restricted disk writes + # ------------------------ + + def _save_file( + self, text: str, directory: str, filename: str + ) -> WriterResponse: + """ + Save text to a file within the allowed_root only. + """ + dir_path = Path(directory).resolve() + file_path = dir_path / filename + + # Ensure the directory is within allowed_root + if not str(dir_path).startswith(str(self.allowed_root)): + return WriterResponse( + success=False, + error=( + f"Target directory '{dir_path}' is outside allowed root '{self.allowed_root}'" + ), + ) + + try: + dir_path.mkdir(parents=True, exist_ok=True) + with file_path.open("w", encoding="utf-8") as f: + f.write(text) + return WriterResponse(success=True, saved_path=str(file_path)) + except Exception as e: + logging.error(f"Failed saving file: {e}") + return WriterResponse(success=False, error=str(e)) + + def _save_chunks( + self, chunks: list, directory: str, filename: str + ) -> WriterResponse: + """ + Save a list of chunk objects as JSON (or in any format) to a single file, + restricted to allowed_root. + """ + import json + + dir_path = Path(directory).resolve() + file_path = dir_path / filename + + # Check if directory is within allowed_root + if not str(dir_path).startswith(str(self.allowed_root)): + return WriterResponse( + success=False, + error=( + f"Target directory '{dir_path}' is outside allowed root '{self.allowed_root}'" + ), + ) + + try: + dir_path.mkdir(parents=True, exist_ok=True) + with file_path.open("w", encoding="utf-8") as f: + json.dump(chunks, f, ensure_ascii=False, indent=2) + return WriterResponse(success=True, saved_path=str(file_path)) + except Exception as e: + logging.error(f"Failed saving chunks: {e}") + return WriterResponse(success=False, error=str(e)) + + def to_tool(self): + if self._tool is None: + + def writer_tool(**kwargs): + """ + Entrypoint for the WriterTool. Accepts a WriterRequest (JSON). + """ + return self.handle_request( + WriterRequest(**kwargs) + ).model_dump() + + if self.system_tool_name != "writer_tool": + writer_tool.__name__ = self.system_tool_name + + self._tool = Tool( + func_callable=writer_tool, + request_options=WriterRequest, + ) + return self._tool diff --git a/lionagi/version.py b/lionagi/version.py index de77196f4..686e10f8f 100644 --- a/lionagi/version.py +++ b/lionagi/version.py @@ -1 +1 @@ -__version__ = "0.8.6" +__version__ = "0.8.7" diff --git a/notebooks/react.ipynb b/notebooks/react.ipynb index 3203fa3d3..5c3669d78 100644 --- a/notebooks/react.ipynb +++ b/notebooks/react.ipynb @@ -5,6 +5,15 @@ "execution_count": 1, "metadata": {}, "outputs": [], + "source": [ + "# %pip install \"lionagi[tools]\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -13,9 +22,24 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'PartialChunk' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlionagi\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Branch, iModel, BaseModel, Field\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlionagi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtypes\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ReaderTool\n", + "File \u001b[0;32m~/lionagi/lionagi/tools/types.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcoder\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CoderTool\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mproviders\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgh_\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GithubTool\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreader\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ReaderTool\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwriter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m WriterTool\n\u001b[1;32m 6\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGithubTool\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCoderTool\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWriterTool\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReaderTool\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 11\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/tools/reader.py:191\u001b[0m\n\u001b[1;32m 187\u001b[0m end: \u001b[38;5;28mint\u001b[39m\n\u001b[1;32m 188\u001b[0m text: \u001b[38;5;28mstr\u001b[39m\n\u001b[0;32m--> 191\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mReaderResponse\u001b[39;00m(BaseModel):\n\u001b[1;32m 192\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;124;03m The response from the 'ReaderTool'.\u001b[39;00m\n\u001b[1;32m 194\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;124;03m - If failure, success=False and error contains details.\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 204\u001b[0m success: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m Field(\n\u001b[1;32m 205\u001b[0m \u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m,\n\u001b[1;32m 206\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIndicates if the requested action was performed successfully.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 207\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/tools/reader.py:214\u001b[0m, in \u001b[0;36mReaderResponse\u001b[0;34m()\u001b[0m\n\u001b[1;32m 208\u001b[0m error: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m Field(\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 210\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDescribes any error that occurred, if success=False.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 211\u001b[0m )\n\u001b[1;32m 213\u001b[0m doc_info: DocumentInfo \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 214\u001b[0m chunk: \u001b[43mPartialChunk\u001b[49m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 215\u001b[0m search_result: SearchResult \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 216\u001b[0m doc_list: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'PartialChunk' is not defined" + ] + } + ], "source": [ "from lionagi import Branch, iModel, BaseModel, Field\n", "from lionagi.tools.types import ReaderTool" @@ -23,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -42,15 +66,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "r1 = iModel(\n", + "sonnet = iModel(\n", " provider=\"openrouter\",\n", - " model=\"deepseek/deepseek-r1\",\n", - " max_tokens=8000,\n", - " invoke_with_endpoint=False,\n", + " model=\"anthropic/claude-3.5-sonnet\",\n", + " temperature=0.65,\n", + " top_p=0.9,\n", + ")\n", + "\n", + "haiku = iModel(\n", + " provider=\"openrouter\",\n", + " model=\"anthropic/claude-3.5-haiku\",\n", " temperature=0.65,\n", " top_p=0.9,\n", ")" @@ -58,31 +87,32 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "a = Branch(chat_model=r1, tools=ReaderTool)\n", + "a = Branch(chat_model=sonnet, parse_model=haiku, tools=ReaderTool)\n", "a.connect(\n", " name=\"search_exa\",\n", " provider=\"exa\",\n", " endpoint=\"search\",\n", " queue_capacity=5,\n", " capacity_refresh_time=1,\n", - " description=\"Search the exa database for relevant information\",\n", + " description=\"search with the exa database for relevant information, requires natural language query\",\n", ")\n", "a.connect(\n", " name=\"search_perplexity\",\n", " provider=\"perplexity\",\n", + " model=\"sonar\",\n", " queue_capacity=100,\n", " capacity_refresh_time=60,\n", - " description=\"Search the perplexity database for relevant information\",\n", + " description=\"chat with the perplexity database for relevant information, requires natural language query, perform 'chat/completions'-like interactions\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -90,23 +120,21 @@ "output_type": "stream", "text": [ "Action reader_tool invoked, status: completed.\n", - "Action reader_tool invoked, status: completed.\n", - "Action search_exa invoked, status: completed.\n", "ReAct Round #1 Analysis:\n", " {\n", - " \"analysis\": \"To address the user's request, the immediate priority is to parse and analyze the DeepSeek_R1.pdf paper using the reader_tool. After extracting its content, a structured summary will be generated. Concurrently, recent papers (post-2022) must be retrieved via external search tools to enable comparative analysis. The first step involves opening and reading the target PDF, followed by searches for relevant recent works.\",\n", + " \"analysis\": \"To provide a comprehensive analysis of the paper and compare it with other recent works, I need to first read the DeepSeek paper and then search for related recent papers in the same field for comparison.\",\n", " \"planned_actions\": [\n", " {\n", " \"action_type\": \"reader_tool\",\n", - " \"description\": \"Open and read the entire DeepSeek_R1.pdf to extract text for analysis.\"\n", + " \"description\": \"First read the DeepSeek paper to understand its content and contributions\"\n", " },\n", " {\n", " \"action_type\": \"search_exa\",\n", - " \"description\": \"Search for recent LLM papers (post-2022) focusing on training efficiency, architecture innovations, and benchmark results.\"\n", + " \"description\": \"Search for recent related papers in the same field for comparison\"\n", " }\n", " ],\n", " \"extension_needed\": true,\n", - " \"milestone\": \"Extract DeepSeek paper content and identify 3-5 relevant recent papers for comparison.\",\n", + " \"milestone\": \"Read and understand the DeepSeek paper content first\",\n", " \"action_strategy\": \"sequential\",\n", " \"action_batch_size\": null,\n", " \"action_responses\": [\n", @@ -120,105 +148,117 @@ " \"success\": true,\n", " \"error\": null,\n", " \"doc_info\": {\n", - " \"doc_id\": \"DOC_151075259435909708\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", " \"length\": 71908\n", " },\n", - " \"chunk\": null\n", + " \"chunk\": null,\n", + " \"search_result\": null,\n", + " \"doc_list\": null,\n", + " \"chunk_list\": null,\n", + " \"chunks_read\": null\n", " }\n", + " }\n", + " ],\n", + " \"action_required\": true,\n", + " \"action_requests\": [\n", + " {\n", + " \"function\": \"reader_tool\",\n", + " \"arguments\": {\n", + " \"action\": \"open\",\n", + " \"path_or_url\": \"/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf\"\n", + " }\n", + " }\n", + " ],\n", + " \"reason\": {\n", + " \"title\": \"Initial Paper Analysis Plan\",\n", + " \"content\": \"We need to first understand the DeepSeek paper's content before searching for comparisons. Sequential processing is necessary since the search queries will be more effective after understanding the paper's focus.\",\n", + " \"confidence_score\": 0.95\n", + " }\n", + "}\n", + "Action reader_tool invoked, status: failed.\n", + "ReAct Round #2 Analysis:\n", + " {\n", + " \"analysis\": \"Now that the document is opened, I need to read its contents to understand the paper. I'll start by chunking the document for easier processing and then read through the chunks systematically.\",\n", + " \"planned_actions\": [\n", + " {\n", + " \"action_type\": \"reader_tool\",\n", + " \"description\": \"Chunk the document into manageable segments\"\n", " },\n", " {\n", + " \"action_type\": \"reader_tool\",\n", + " \"description\": \"Read the first few chunks to understand the paper's introduction and methodology\"\n", + " }\n", + " ],\n", + " \"extension_needed\": true,\n", + " \"milestone\": \"Process and understand the paper's key contents through chunked reading\",\n", + " \"action_strategy\": \"sequential\",\n", + " \"action_batch_size\": null,\n", + " \"action_responses\": [\n", + " {\n", " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", - " \"action\": \"read\",\n", - " \"doc_id\": \"DeepSeek_R1.pdf\",\n", - " \"start_offset\": 0,\n", - " \"end_offset\": {}\n", + " \"action\": \"chunk_doc\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", + " \"chunk_size\": 2000,\n", + " \"overlap\": 0.1,\n", + " \"threshold\": 200\n", " },\n", - " \"output\": {\n", - " \"success\": false,\n", - " \"error\": \"doc_id not found in memory\",\n", - " \"doc_info\": null,\n", - " \"chunk\": null\n", + " \"output\": null\n", + " }\n", + " ],\n", + " \"action_required\": true,\n", + " \"action_requests\": [\n", + " {\n", + " \"function\": \"reader_tool\",\n", + " \"arguments\": {\n", + " \"action\": \"chunk_doc\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", + " \"chunk_size\": 2000,\n", + " \"overlap\": 0.1,\n", + " \"threshold\": 200\n", " }\n", + " }\n", + " ],\n", + " \"reason\": {\n", + " \"title\": \"Document Processing Setup\",\n", + " \"content\": \"The document needs to be chunked first before reading to handle it efficiently. Sequential processing is necessary as we need the chunks created before we can read them.\",\n", + " \"confidence_score\": 0.95\n", + " }\n", + "}\n", + "Action reader_tool invoked, status: completed.\n", + "ReAct Round #3 Analysis:\n", + " {\n", + " \"analysis\": \"Now that the document is opened and chunked, I'll start reading the chunks to understand the paper's content. I'll begin with the first few chunks that likely contain the abstract, introduction and methodology sections.\",\n", + " \"planned_actions\": [\n", + " {\n", + " \"action_type\": \"reader_tool\",\n", + " \"description\": \"List available chunks to understand document structure\"\n", " },\n", " {\n", - " \"function\": \"search_exa\",\n", + " \"action_type\": \"reader_tool\",\n", + " \"description\": \"Read first few chunks containing abstract and introduction\"\n", + " }\n", + " ],\n", + " \"extension_needed\": true,\n", + " \"milestone\": \"Understand paper's key contributions and methodology through initial sections\",\n", + " \"action_strategy\": \"sequential\",\n", + " \"action_batch_size\": null,\n", + " \"action_responses\": [\n", + " {\n", + " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", - " \"query\": \"large language model training efficiency innovations post-2022\",\n", - " \"category\": \"research paper\",\n", - " \"numResults\": 5,\n", - " \"startPublishedDate\": \"2023-01-01T00:00:00.000Z\",\n", - " \"contents\": {\n", - " \"text\": {\n", - " \"includeHtmlTags\": false,\n", - " \"maxCharacters\": 5000\n", - " }\n", - " }\n", + " \"action\": \"list_chunks\",\n", + " \"doc_id\": \"DOC_2439136994867403026\"\n", " },\n", " \"output\": {\n", - " \"requestId\": \"cc2e67ed84bb891272f8027411c14ca9\",\n", - " \"resolvedSearchType\": \"neural\",\n", - " \"results\": [\n", - " {\n", - " \"score\": 0.24028775095939636,\n", - " \"title\": \"Experience of Training a 1.7B-Parameter LLaMa Model From Scratch\",\n", - " \"id\": \"https://arxiv.org/abs/2412.13335\",\n", - " \"url\": \"https://arxiv.org/abs/2412.13335\",\n", - " \"publishedDate\": \"2024-12-17T00:00:00.000Z\",\n", - " \"author\": \"[Submitted on 17 Dec 2024]\",\n", - " \"text\": \"View PDF HTML (experimental) Abstract: Pretraining large language models is a complex endeavor influenced by multiple factors, including model architecture, data quality, training continuity, and hardware constraints. In this paper, we share insights gained from the experience of training DMaS-LLaMa-Lite, a fully open source, 1.7-billion-parameter, LLaMa-based model, on approximately 20 billion tokens of carefully curated data. We chronicle the full training trajectory, documenting how evolving validation loss levels and downstream benchmarks reflect transitions from incoherent text to fluent, contextually grounded output. Beyond standard quantitative metrics, we highlight practical considerations such as the importance of restoring optimizer states when resuming from checkpoints, and the impact of hardware changes on training stability and throughput. While qualitative evaluation provides an intuitive understanding of model improvements, our analysis extends to various performance benchmarks, demonstrating how high-quality data and thoughtful scaling enable competitive results with significantly fewer training tokens. By detailing these experiences and offering training logs, checkpoints, and sample outputs, we aim to guide future researchers and practitioners in refining their pretraining strategies. The training script is available on Github at this https URL . The model checkpoints are available on Huggingface at this https URL . Submission history From: Miles Q. Li [ view email ] [v1] Tue, 17 Dec 2024 21:15:52 UTC (471 KB)\",\n", - " \"image\": \"/static/browse/0.3.4/images/arxiv-logo-fb.png\",\n", - " \"favicon\": \"https://arxiv.org/static/browse/0.3.4/images/icons/favicon-32x32.png\"\n", - " },\n", - " {\n", - " \"score\": 0.24056276679039001,\n", - " \"title\": \"Computational Bottlenecks of Training Small-scale Large Language Models\",\n", - " \"id\": \"https://arxiv.org/abs/2410.19456\",\n", - " \"url\": \"https://arxiv.org/abs/2410.19456\",\n", - " \"publishedDate\": \"2024-10-25T00:00:00.000Z\",\n", - " \"author\": \"Ashkboos; Saleh; Mirzadeh; Iman; Alizadeh; Keivan; Sekhavat; Mohammad Hossein; Nabi; Moin; Farajtabar; Mehrdad; Faghri; Fartash\",\n", - " \"text\": \"View PDF HTML (experimental) While large language models (LLMs) dominate the AI landscape, Small-scale large Language Models (SLMs) are gaining attention due to cost and efficiency demands from consumers. However, there is limited research on the training behavior and computational requirements of SLMs. In this study, we explore the computational bottlenecks of training SLMs (up to 2B parameters) by examining the effects of various hyperparameters and configurations, including GPU type, batch size, model size, communication protocol, attention type, and the number of GPUs. We assess these factors on popular cloud services using metrics such as loss per dollar and tokens per second. Our findings aim to support the broader adoption and optimization of language model training for low-resource AI research institutes. Submission history From: Saleh Ashkboos [ view email ] [v1] Fri, 25 Oct 2024 10:30:21 UTC (216 KB)\"\n", - " },\n", - " {\n", - " \"score\": 0.23878230154514313,\n", - " \"title\": \"BabyHGRN: Exploring RNNs for Sample-Efficient Training of Language Models\",\n", - " \"id\": \"https://arxiv.org/abs/2412.15978\",\n", - " \"url\": \"https://arxiv.org/abs/2412.15978\",\n", - " \"publishedDate\": \"2024-12-20T00:00:00.000Z\",\n", - " \"author\": \"[Submitted on 20 Dec 2024]\",\n", - " \"text\": \"View PDF HTML (experimental) Abstract: This paper explores the potential of recurrent neural networks (RNNs) and other subquadratic architectures as competitive alternatives to transformer-based models in low-resource language modeling scenarios. We utilize HGRN2 (Qin et al., 2024), a recently proposed RNN-based architecture, and comparatively evaluate its effectiveness against transformer-based baselines and other subquadratic architectures (LSTM, xLSTM, Mamba). Our experimental results show that BABYHGRN, our HGRN2 language model, outperforms transformer-based models in both the 10M and 100M word tracks of the challenge, as measured by their performance on the BLiMP, EWoK, GLUE and BEAR benchmarks. Further, we show the positive impact of knowledge distillation. Our findings challenge the prevailing focus on transformer architectures and indicate the viability of RNN-based models, particularly in resource-constrained environments. Submission history From: Patrick Haller [ view email ] [v1] Fri, 20 Dec 2024 15:21:41 UTC (7,567 KB)\",\n", - " \"image\": \"/static/browse/0.3.4/images/arxiv-logo-fb.png\",\n", - " \"favicon\": \"https://arxiv.org/static/browse/0.3.4/images/icons/favicon-32x32.png\"\n", - " },\n", - " {\n", - " \"score\": 0.23888613283634186,\n", - " \"title\": \"Dynamic Subset Tuning: Expanding the Operational Range of Parameter-Efficient Training for Large Language Models\",\n", - " \"id\": \"https://arxiv.org/abs/2411.08610\",\n", - " \"url\": \"https://arxiv.org/abs/2411.08610\",\n", - " \"publishedDate\": \"2024-11-13T00:00:00.000Z\",\n", - " \"author\": \"[Submitted on 13 Nov 2024]\",\n", - " \"text\": \"View PDF HTML (experimental) Abstract: We propose a novel parameter-efficient training (PET) method for large language models that adapts models to downstream tasks by optimizing a small subset of the existing model parameters. Unlike prior methods, this subset is not fixed in location but rather which parameters are modified evolves over the course of training. This dynamic parameter selection can yield good performance with many fewer parameters than extant methods. Our method enables a seamless scaling of the subset size across an arbitrary proportion of the total model size, while popular PET approaches like prompt tuning and LoRA cover only a small part of this spectrum. We match or outperform prompt tuning and LoRA in most cases on a variety of NLP tasks (MT, QA, GSM8K, SuperGLUE) for a given parameter budget across different model families and sizes. Submission history From: Felix Stahlberg [ view email ] [v1] Wed, 13 Nov 2024 13:53:10 UTC (274 KB)\",\n", - " \"image\": \"/static/browse/0.3.4/images/arxiv-logo-fb.png\"\n", - " },\n", - " {\n", - " \"score\": 0.23952573537826538,\n", - " \"title\": \"Training Ultra Long Context Language Model with Fully Pipelined Distributed Transformer\",\n", - " \"id\": \"https://arxiv.org/abs/2408.16978\",\n", - " \"url\": \"https://arxiv.org/abs/2408.16978\",\n", - " \"publishedDate\": \"2024-08-30T00:00:00.000Z\",\n", - " \"author\": \"Yao; Jinghan; Jacobs; Sam Ade; Tanaka; Masahiro; Ruwase; Olatunji; Shafi; Aamir; Subramoni; Hari; Panda; Dhabaleswar K\",\n", - " \"text\": \"View PDF HTML (experimental) Large Language Models (LLMs) with long context capabilities are integral to complex tasks in natural language processing and computational biology, such as text generation and protein sequence analysis. However, training LLMs directly on extremely long contexts demands considerable GPU resources and increased memory, leading to higher costs and greater complexity. Alternative approaches that introduce long context capabilities via downstream finetuning or adaptations impose significant design limitations. In this paper, we propose Fully Pipelined Distributed Transformer (FPDT) for efficiently training long-context LLMs with extreme hardware efficiency. For GPT and Llama models, we achieve a 16x increase in sequence length that can be trained on the same hardware compared to current state-of-the-art solutions. With our dedicated sequence chunk pipeline design, we can now train 8B LLM with 2 million sequence length on only 4 GPUs, while also maintaining over 55% of MFU. Our proposed FPDT is agnostic to existing training techniques and is proven to work efficiently across different LLM models. Submission history From: Jinghan Yao [ view email ] [v1] Fri, 30 Aug 2024 02:44:26 UTC (6,761 KB)\"\n", - " }\n", - " ],\n", - " \"effectiveFilters\": {\n", - " \"startPublishedDate\": \"2023-01-01T00:00:00.000Z\",\n", - " \"category\": \"research paper\",\n", - " \"includeDomains\": [],\n", - " \"excludeDomains\": [],\n", - " \"includeText\": [],\n", - " \"excludeText\": [],\n", - " \"urls\": []\n", - " }\n", + " \"success\": true,\n", + " \"error\": null,\n", + " \"doc_info\": null,\n", + " \"chunk\": null,\n", + " \"search_result\": null,\n", + " \"doc_list\": null,\n", + " \"chunk_list\": [],\n", + " \"chunks_read\": null\n", " }\n", " }\n", " ],\n", @@ -227,53 +267,80 @@ " {\n", " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", - " \"action\": \"open\",\n", - " \"path_or_url\": \"/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf\"\n", + " \"action\": \"list_chunks\",\n", + " \"doc_id\": \"DOC_2439136994867403026\"\n", " }\n", - " },\n", + " }\n", + " ],\n", + " \"reason\": {\n", + " \"title\": \"Initial Content Review\",\n", + " \"content\": \"Need to first list chunks and then read initial sections to understand the paper's core contributions before proceeding with deeper analysis and comparisons.\",\n", + " \"confidence_score\": 0.95\n", + " }\n", + "}\n", + "Action reader_tool invoked, status: completed.\n", + "ReAct Round #4 Analysis:\n", + " {\n", + " \"analysis\": \"Since the document has been chunked successfully, I'll start by reading the first chunk to understand the paper's introduction and main contributions.\",\n", + " \"planned_actions\": [\n", + " {\n", + " \"action_type\": \"reader_tool\",\n", + " \"description\": \"Read the first chunk to understand the paper's introduction\"\n", + " }\n", + " ],\n", + " \"extension_needed\": true,\n", + " \"milestone\": \"Extract key information from the paper's introduction\",\n", + " \"action_strategy\": \"sequential\",\n", + " \"action_batch_size\": null,\n", + " \"action_responses\": [\n", " {\n", " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", - " \"action\": \"read\",\n", - " \"doc_id\": \"DeepSeek_R1.pdf\",\n", - " \"start_offset\": 0,\n", - " \"end_offset\": {}\n", + " \"action\": \"read_chunk\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", + " \"chunk_index\": 0\n", + " },\n", + " \"output\": {\n", + " \"success\": false,\n", + " \"error\": \"chunk_index out of range\",\n", + " \"doc_info\": null,\n", + " \"chunk\": null,\n", + " \"search_result\": null,\n", + " \"doc_list\": null,\n", + " \"chunk_list\": null,\n", + " \"chunks_read\": null\n", " }\n", - " },\n", + " }\n", + " ],\n", + " \"action_required\": true,\n", + " \"action_requests\": [\n", " {\n", - " \"function\": \"search_exa\",\n", + " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", - " \"query\": \"large language model training efficiency innovations post-2022\",\n", - " \"category\": \"research paper\",\n", - " \"numResults\": 5,\n", - " \"startPublishedDate\": \"2023-01-01T00:00:00.000Z\",\n", - " \"contents\": {\n", - " \"text\": {\n", - " \"includeHtmlTags\": false,\n", - " \"maxCharacters\": 5000\n", - " }\n", - " }\n", + " \"action\": \"read_chunk\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", + " \"chunk_index\": 0\n", " }\n", " }\n", " ],\n", " \"reason\": {\n", - " \"title\": \"Initial Action Plan\",\n", - " \"content\": \"The DeepSeek paper must be parsed first to identify its core contributions. Parallel searches for recent works will ensure up-to-date comparisons. Sequential execution avoids partial data analysis.\",\n", + " \"title\": \"Initial Content Reading\",\n", + " \"content\": \"After successful chunking, we need to start with the first chunk which typically contains the abstract and introduction. This will give us the paper's main contributions and objectives.\",\n", " \"confidence_score\": 0.95\n", " }\n", "}\n", "Action reader_tool invoked, status: completed.\n", - "ReAct Round #2 Analysis:\n", + "ReAct Round #5 Analysis:\n", " {\n", - " \"analysis\": \"The initial attempt to read 'DeepSeek_R1.pdf' failed due to incorrect doc_id reference. The document is now successfully opened (doc_id: DOC_151075259435909708). Recent comparative papers (2023-2024) have been retrieved through search_exa, focusing on LLM training efficiency, architecture innovations, and computational optimization. Immediate next step is to read the full DeepSeek paper using the confirmed doc_id to enable structured analysis.\",\n", + " \"analysis\": \"The previous attempt to read chunk 0 failed. Let me try reading the document directly with specific offsets to understand its content.\",\n", " \"planned_actions\": [\n", " {\n", " \"action_type\": \"reader_tool\",\n", - " \"description\": \"Read full DeepSeek_R1 content using valid doc_id for analysis.\"\n", + " \"description\": \"Read the first portion of the document to understand the introduction\"\n", " }\n", " ],\n", " \"extension_needed\": true,\n", - " \"milestone\": \"Obtain full text of DeepSeek paper to begin thematic analysis\",\n", + " \"milestone\": \"Extract key information from the paper's introduction and methodology\",\n", " \"action_strategy\": \"sequential\",\n", " \"action_batch_size\": null,\n", " \"action_responses\": [\n", @@ -281,9 +348,9 @@ " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", " \"action\": \"read\",\n", - " \"doc_id\": \"DOC_151075259435909708\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", " \"start_offset\": 0,\n", - " \"end_offset\": {}\n", + " \"end_offset\": 4000\n", " },\n", " \"output\": {\n", " \"success\": true,\n", @@ -291,9 +358,13 @@ " \"doc_info\": null,\n", " \"chunk\": {\n", " \"start_offset\": 0,\n", - " \"end_offset\": 71908,\n", - " \"content\": \"\\n\\n## DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning\\n\\nDeepSeek-AI\\n\\nresearch@deepseek.com\\n\\n## Abstract\\n\\nWe introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeekR1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama.\\n\\nFigure 1 | Benchmark performance of DeepSeek-R1.\\n\\n\\n\\n## Contents\\n\\n| 1 Introduction | 1 Introduction | 1 Introduction | 3 |\\n|-------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------|-----|\\n| 1.1 | Contributions | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 4 |\\n| 1.2 | Summary of Evaluation Results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | Summary of Evaluation Results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 4 |\\n| 2 | Approach | Approach | 5 |\\n| 2.1 | Overview . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | Overview . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 5 |\\n| 2.2 | DeepSeek-R1-Zero: Reinforcement Learning on the Base Model | . . . . . . . . . . | 5 |\\n| | 2.2.1 | Reinforcement Learning Algorithm . . . . . . . . . . . . . . . . . . . . . . | 5 |\\n| | 2.2.2 | Reward Modeling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 6 |\\n| | 2.2.3 | Training Template . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 6 |\\n| | 2.2.4 | Performance, Self-evolution Process and Aha Moment of DeepSeek-R1-Zero | 6 |\\n| 2.3 | DeepSeek-R1: Reinforcement Learning with Cold Start . . . . . . . . . . . | . . . . | 9 |\\n| | 2.3.1 | Cold Start . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 9 |\\n| | 2.3.2 | Reasoning-oriented Reinforcement Learning . . . . . . . . . . . . . . . . . | 10 |\\n| | 2.3.3 | Rejection Sampling and Supervised Fine-Tuning . . . . . . . . . . . . . . . | 10 |\\n| | 2.3.4 | Reinforcement Learning for all Scenarios . . . . . . . . . . . . . . . . . . . | 11 |\\n| 2.4 | Distillation: Empower Small Models with Reasoning Capability . . . . . . . . . . | Distillation: Empower Small Models with Reasoning Capability . . . . . . . . . . | 11 |\\n| 3 Experiment | 3 Experiment | 3 Experiment | 11 |\\n| 3.1 | DeepSeek-R1 Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | DeepSeek-R1 Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 12 |\\n| 3.2 | Distilled Model Evaluation | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 14 |\\n| 4 Discussion | 4 Discussion | 4 Discussion | 14 |\\n| 4.1 | | Distillation v.s. Reinforcement Learning . . . . . . . . . . . . . . . . . . . . . . . . | 14 |\\n| | 4.2 | Unsuccessful Attempts . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 15 |\\n| 5 Conclusion, Limitation, and Future Work | 5 Conclusion, Limitation, and Future Work | 5 Conclusion, Limitation, and Future Work | 16 |\\n| A Contributions and Acknowledgments | A Contributions and Acknowledgments | A Contributions and Acknowledgments | 20 |\\n\\n## 1. Introduction\\n\\nIn recent years, Large Language Models (LLMs) have been undergoing rapid iteration and evolution (Anthropic, 2024; Google, 2024; OpenAI, 2024a), progressively diminishing the gap towards Artificial General Intelligence (AGI).\\n\\nRecently, post-training has emerged as an important component of the full training pipeline. It has been shown to enhance accuracy on reasoning tasks, align with social values, and adapt to user preferences, all while requiring relatively minimal computational resources against pre-training. In the context of reasoning capabilities, OpenAI's o1 (OpenAI, 2024b) series models were the first to introduce inference-time scaling by increasing the length of the Chain-ofThought reasoning process. This approach has achieved significant improvements in various reasoning tasks, such as mathematics, coding, and scientific reasoning. However, the challenge of effective test-time scaling remains an open question for the research community. Several prior works have explored various approaches, including process-based reward models (Lightman et al., 2023; Uesato et al., 2022; Wang et al., 2023), reinforcement learning (Kumar et al., 2024), and search algorithms such as Monte Carlo Tree Search and Beam Search (Feng et al., 2024; Trinh et al., 2024; Xin et al., 2024). However, none of these methods has achieved general reasoning performance comparable to OpenAI's o1 series models.\\n\\nIn this paper, we take the first step toward improving language model reasoning capabilities using pure reinforcement learning (RL). Our goal is to explore the potential of LLMs to develop reasoning capabilities without any supervised data, focusing on their self-evolution through a pure RL process. Specifically, we use DeepSeek-V3-Base as the base model and employ GRPO (Shao et al., 2024) as the RL framework to improve model performance in reasoning. During training, DeepSeek-R1-Zero naturally emerged with numerous powerful and interesting reasoning behaviors. After thousands of RL steps, DeepSeek-R1-Zero exhibits super performance on reasoning benchmarks. For instance, the pass@1 score on AIME 2024 increases from 15.6% to 71.0%, and with majority voting, the score further improves to 86.7%, matching the performance of OpenAI-o1-0912.\\n\\nHowever, DeepSeek-R1-Zero encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates a small amount of cold-start data and a multi-stage training pipeline. Specifically, we begin by collecting thousands of cold-start data to fine-tune the DeepSeek-V3-Base model. Following this, we perform reasoning-oriented RL like DeepSeek-R1Zero. Upon nearing convergence in the RL process, we create new SFT data through rejection sampling on the RL checkpoint, combined with supervised data from DeepSeek-V3 in domains such as writing, factual QA, and self-cognition, and then retrain the DeepSeek-V3-Base model. After fine-tuning with the new data, the checkpoint undergoes an additional RL process, taking into account prompts from all scenarios. After these steps, we obtained a checkpoint referred to as DeepSeek-R1, which achieves performance on par with OpenAI-o1-1217.\\n\\nWe further explore distillation from DeepSeek-R1 to smaller dense models. Using Qwen2.532B (Qwen, 2024b) as the base model, direct distillation from DeepSeek-R1 outperforms applying RL on it. This demonstrates that the reasoning patterns discovered by larger base models are crucial for improving reasoning capabilities. We open-source the distilled Qwen and Llama (Dubey et al., 2024) series. Notably, our distilled 14B model outperforms state-of-the-art open-source QwQ-32B-Preview (Qwen, 2024a) by a large margin, and the distilled 32B and 70B models set a new record on the reasoning benchmarks among dense models.\\n\\n## 1.1. Contributions\\n\\n## Post-Training: Large-Scale Reinforcement Learning on the Base Model\\n\\n- · We directly apply reinforcement learning (RL) to the base model without relying on supervised fine-tuning (SFT) as a preliminary step. This approach allows the model to explore chain-of-thought (CoT) for solving complex problems, resulting in the development of DeepSeek-R1-Zero. DeepSeek-R1-Zero demonstrates capabilities such as self-verification, reflection, and generating long CoTs, marking a significant milestone for the research community. Notably, it is the first open research to validate that reasoning capabilities of LLMs can be incentivized purely through RL, without the need for SFT. This breakthrough paves the way for future advancements in this area.\\n- · We introduce our pipeline to develop DeepSeek-R1. The pipeline incorporates two RL stages aimed at discovering improved reasoning patterns and aligning with human preferences, as well as two SFT stages that serve as the seed for the model's reasoning and non-reasoning capabilities. We believe the pipeline will benefit the industry by creating better models.\\n\\n## Distillation: Smaller Models Can Be Powerful Too\\n\\n- · We demonstrate that the reasoning patterns of larger models can be distilled into smaller models, resulting in better performance compared to the reasoning patterns discovered through RL on small models. The open source DeepSeek-R1, as well as its API, will benefit the research community to distill better smaller models in the future.\\n- · Using the reasoning data generated by DeepSeek-R1, we fine-tuned several dense models that are widely used in the research community. The evaluation results demonstrate that the distilled smaller dense models perform exceptionally well on benchmarks. DeepSeekR1-Distill-Qwen-7B achieves 55.5% on AIME 2024, surpassing QwQ-32B-Preview. Additionally, DeepSeek-R1-Distill-Qwen-32B scores 72.6% on AIME 2024, 94.3% on MATH-500, and 57.2% on LiveCodeBench. These results significantly outperform previous opensource models and are comparable to o1-mini. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community.\\n\\n## 1.2. Summary of Evaluation Results\\n\\n- · Reasoning tasks : (1) DeepSeek-R1 achieves a score of 79.8% Pass@1 on AIME 2024, slightly surpassing OpenAI-o1-1217. On MATH-500, it attains an impressive score of 97.3%, performing on par with OpenAI-o1-1217 and significantly outperforming other models. (2) Oncoding-related tasks, DeepSeek-R1 demonstrates expert level in code competition tasks, as it achieves 2,029 Elo rating on Codeforces outperforming 96.3% human participants in the competition. For engineering-related tasks, DeepSeek-R1 performs slightly better than DeepSeek-V3, which could help developers in real world tasks.\\n- · Knowledge : On benchmarks such as MMLU, MMLU-Pro, and GPQA Diamond, DeepSeekR1 achieves outstanding results, significantly outperforming DeepSeek-V3 with scores of 90.8% on MMLU, 84.0% on MMLU-Pro, and 71.5% on GPQA Diamond. While its performance is slightly below that of OpenAI-o1-1217 on these benchmarks, DeepSeek-R1 surpasses other closed-source models, demonstrating its competitive edge in educational tasks. On the factual benchmark SimpleQA, DeepSeek-R1 outperforms DeepSeek-V3, demonstrating its capability in handling fact-based queries. A similar trend is observed where OpenAI-o1 surpasses 4o on this benchmark.\\n\\n- · Others : DeepSeek-R1 also excels in a wide range of tasks, including creative writing, general question answering, editing, summarization, and more. It achieves an impressive length-controlled win-rate of 87.6% on AlpacaEval 2.0 and a win-rate of 92.3% on ArenaHard, showcasing its strong ability to intelligently handle non-exam-oriented queries. Additionally, DeepSeek-R1 demonstrates outstanding performance on tasks requiring long-context understanding, substantially outperforming DeepSeek-V3 on long-context benchmarks.\\n\\n## 2. Approach\\n\\n## 2.1. Overview\\n\\nPrevious work has heavily relied on large amounts of supervised data to enhance model performance. In this study, we demonstrate that reasoning capabilities can be significantly improved through large-scale reinforcement learning (RL), even without using supervised fine-tuning (SFT) as a cold start. Furthermore, performance can be further enhanced with the inclusion of a small amount of cold-start data. In the following sections, we present: (1) DeepSeek-R1-Zero, which applies RL directly to the base model without any SFT data, and (2) DeepSeek-R1, which applies RL starting from a checkpoint fine-tuned with thousands of long Chain-of-Thought (CoT) examples. 3) Distill the reasoning capability from DeepSeek-R1 to small dense models.\\n\\n## 2.2. DeepSeek-R1-Zero: Reinforcement Learning on the Base Model\\n\\nReinforcement learning has demonstrated significant effectiveness in reasoning tasks, as evidenced by our previous works (Shao et al., 2024; Wang et al., 2023). However, these works heavily depended on supervised data, which are time-intensive to gather. In this section, we explore the potential of LLMs to develop reasoning capabilities without any supervised data , focusing on their self-evolution through a pure reinforcement learning process. We start with a brief overview of our reinforcement learning algorithm, followed by the presentation of some exciting results, and hope this provides the community with valuable insights.\\n\\n## 2.2.1. Reinforcement Learning Algorithm\\n\\nGroup Relative Policy Optimization In order to save the training costs of RL, we adopt Group Relative Policy Optimization (GRPO) (Shao et al., 2024), which foregoes the critic model that is typically the same size as the policy model, and estimates the baseline from group scores instead. Specifically, for each question 𝑞 , GRPO samples a group of outputs { 𝑜 1, 𝑜 2, · · · , 𝑜 𝐺 } from the old policy 𝜋 𝜃 𝑜𝑙𝑑 and then optimizes the policy model 𝜋 𝜃 by maximizing the following objective:\\n\\nJ 𝐺𝑅𝑃𝑂 ( 𝜃 ) = E [ 𝑞 ∼ 𝑃 ( 𝑄 ) , { 𝑜 𝑖 } 𝐺 𝑖 = 1 ∼ 𝜋 𝜃 𝑜𝑙𝑑 ( 𝑂 | 𝑞 )]\\n\\n1 𝐺 𝐺 ∑︁ 𝑖 = 1 GLYPH<18> min GLYPH<18> 𝜋 𝜃 ( 𝑜 𝑖 | 𝑞 ) 𝜋 𝜃 𝑜𝑙𝑑 ( 𝑜 𝑖 | 𝑞 ) 𝐴 𝑖 , clip GLYPH<18> 𝜋 𝜃 ( 𝑜 𝑖 | 𝑞 ) 𝜋 𝜃 𝑜𝑙𝑑 ( 𝑜 𝑖 | 𝑞 ) , 1 -𝜀 , 1 + 𝜀 GLYPH<19> 𝐴 𝑖 GLYPH<19> -𝛽 D 𝐾𝐿 GLYPH<0> 𝜋 𝜃 | | 𝜋 𝑟𝑒 𝑓 GLYPH<1> GLYPH<19> , (1)\\n\\nD 𝐾𝐿 GLYPH<0> 𝜋 𝜃 | | 𝜋 𝑟𝑒 𝑓 GLYPH<1> = 𝜋 𝑟𝑒 𝑓 ( 𝑜 𝑖 | 𝑞 ) 𝜋 𝜃 ( 𝑜 𝑖 | 𝑞 ) -log 𝜋 𝑟𝑒 𝑓 ( 𝑜 𝑖 | 𝑞 ) 𝜋 𝜃 ( 𝑜 𝑖 | 𝑞 ) -1, (2)\\n\\nwhere 𝜀 and 𝛽 are hyper-parameters, and 𝐴 𝑖 is the advantage, computed using a group of rewards { 𝑟 1, 𝑟 2, . . . , 𝑟 𝐺 } corresponding to the outputs within each group:\\n\\n𝐴 𝑖 = 𝑟 𝑖 -m 𝑒𝑎𝑛 ({ 𝑟 1, 𝑟 2, · · · , 𝑟 𝐺 }) s 𝑡𝑑 ({ 𝑟 1, 𝑟 2, · · · , 𝑟 𝐺 }) . (3)\\n\\nAconversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within and tags, respectively, i.e., reasoning process here answer here . User: prompt. Assistant:\\n\\nTable 1 | Template for DeepSeek-R1-Zero. prompt will be replaced with the specific reasoning question during training.\\n\\n## 2.2.2. Reward Modeling\\n\\nThe reward is the source of the training signal, which decides the optimization direction of RL. To train DeepSeek-R1-Zero, we adopt a rule-based reward system that mainly consists of two types of rewards:\\n\\n- · Accuracy rewards : The accuracy reward model evaluates whether the response is correct. For example, in the case of math problems with deterministic results, the model is required to provide the final answer in a specified format (e.g., within a box), enabling reliable rule-based verification of correctness. Similarly, for LeetCode problems, a compiler can be used to generate feedback based on predefined test cases.\\n- · Format rewards : In addition to the accuracy reward model, we employ a format reward model that enforces the model to put its thinking process between '' and '' tags.\\n\\nWe do not apply the outcome or process neural reward model in developing DeepSeek-R1-Zero, because we find that the neural reward model may suffer from reward hacking in the large-scale reinforcement learning process, and retraining the reward model needs additional training resources and it complicates the whole training pipeline.\\n\\n## 2.2.3. Training Template\\n\\nTo train DeepSeek-R1-Zero, we begin by designing a straightforward template that guides the base model to adhere to our specified instructions. As depicted in Table 1, this template requires DeepSeek-R1-Zero to first produce a reasoning process, followed by the final answer. We intentionally limit our constraints to this structural format, avoiding any content-specific biases-such as mandating reflective reasoning or promoting particular problem-solving strategies-to ensure that we can accurately observe the model's natural progression during the reinforcement learning (RL) process.\\n\\n## 2.2.4. Performance, Self-evolution Process and Aha Moment of DeepSeek-R1-Zero\\n\\nPerformance of DeepSeek-R1-Zero Figure 2 depicts the performance trajectory of DeepSeekR1-Zero on the AIME 2024 benchmark throughout the reinforcement learning (RL) training process. As illustrated, DeepSeek-R1-Zero demonstrates a steady and consistent enhancement in performance as the RL training advances. Notably, the average pass@1 score on AIME 2024 shows a significant increase, jumping from an initial 15.6% to an impressive 71.0%, reaching performance levels comparable to OpenAI-o1-0912. This significant improvement highlights the efficacy of our RL algorithm in optimizing the model's performance over time.\\n\\nTable 2 provides a comparative analysis between DeepSeek-R1-Zero and OpenAI's o1-0912 models across a variety of reasoning-related benchmarks. The findings reveal that RL empowers\\n\\nTable 2 | Comparison of DeepSeek-R1-Zero and OpenAI o1 models on reasoning-related benchmarks.\\n\\n| Model | AIME 2024 | AIME 2024 | MATH-500 | GPQA Diamond | LiveCode Bench | CodeForces |\\n|------------------|-------------|-------------|------------|----------------|------------------|--------------|\\n| | pass@1 | cons@64 | pass@1 | pass@1 | pass@1 | rating |\\n| OpenAI-o1-mini | 63.6 | 80.0 | 90.0 | 60.0 | 53.8 | 1820 |\\n| OpenAI-o1-0912 | 74.4 | 83.3 | 94.8 | 77.3 | 63.4 | 1843 |\\n| DeepSeek-R1-Zero | 71.0 | 86.7 | 95.9 | 73.3 | 50.0 | 1444 |\\n\\nDeepSeek-Rl-Zero AIME accuracy during training\\n\\nFigure 2 | AIME accuracy of DeepSeek-R1-Zero during training. For each question, we sample 16 responses and calculate the overall average accuracy to ensure a stable evaluation.\\n\\n\\n\\nDeepSeek-R1-Zero to attain robust reasoning capabilities without the need for any supervised fine-tuning data. This is a noteworthy achievement, as it underscores the model's ability to learn and generalize effectively through RL alone. Additionally, the performance of DeepSeekR1-Zero can be further augmented through the application of majority voting. For example, when majority voting is employed on the AIME benchmark, DeepSeek-R1-Zero's performance escalates from 71.0% to 86.7%, thereby exceeding the performance of OpenAI-o1-0912. The ability of DeepSeek-R1-Zero to achieve such competitive performance, both with and without majority voting, highlights its strong foundational capabilities and its potential for further advancements in reasoning tasks.\\n\\nSelf-evolution Process of DeepSeek-R1-Zero The self-evolution process of DeepSeek-R1-Zero is a fascinating demonstration of how RL can drive a model to improve its reasoning capabilities autonomously. By initiating RL directly from the base model, we can closely monitor the model's progression without the influence of the supervised fine-tuning stage. This approach provides a clear view of how the model evolves over time, particularly in terms of its ability to handle complex reasoning tasks.\\n\\nAs depicted in Figure 3, the thinking time of DeepSeek-R1-Zero shows consistent improve-\\n\\nFigure 3 | The average response length of DeepSeek-R1-Zero on the training set during the RL process. DeepSeek-R1-Zero naturally learns to solve reasoning tasks with more thinking time.\\n\\n\\n\\nment throughout the training process. This improvement is not the result of external adjustments but rather an intrinsic development within the model. DeepSeek-R1-Zero naturally acquires the ability to solve increasingly complex reasoning tasks by leveraging extended test-time computation. This computation ranges from generating hundreds to thousands of reasoning tokens, allowing the model to explore and refine its thought processes in greater depth.\\n\\nOne of the most remarkable aspects of this self-evolution is the emergence of sophisticated behaviors as the test-time computation increases. Behaviors such as reflection-where the model revisits and reevaluates its previous steps-and the exploration of alternative approaches to problem-solving arise spontaneously. These behaviors are not explicitly programmed but instead emerge as a result of the model's interaction with the reinforcement learning environment. This spontaneous development significantly enhances DeepSeek-R1-Zero's reasoning capabilities, enabling it to tackle more challenging tasks with greater efficiency and accuracy.\\n\\nAha Moment of DeepSeek-R1-Zero Aparticularly intriguing phenomenon observed during the training of DeepSeek-R1-Zero is the occurrence of an 'aha moment'. This moment, as illustrated in Table 3, occurs in an intermediate version of the model. During this phase, DeepSeek-R1-Zero learns to allocate more thinking time to a problem by reevaluating its initial approach. This behavior is not only a testament to the model's growing reasoning abilities but also a captivating example of how reinforcement learning can lead to unexpected and sophisticated outcomes.\\n\\nThis moment is not only an 'aha moment' for the model but also for the researchers observing its behavior. It underscores the power and beauty of reinforcement learning: rather than explicitly teaching the model on how to solve a problem, we simply provide it with the right incentives, and it autonomously develops advanced problem-solving strategies. The 'aha moment' serves as a powerful reminder of the potential of RL to unlock new levels of intelligence in artificial systems, paving the way for more autonomous and adaptive models in the future.\\n\\n```\\nQuestion: If 𝑎 > 1, then the sum of the real solutions of √︁ 𝑎 -√ 𝑎 + 𝑥 = 𝑥 is equal to Response: To solve the equation √︁ 𝑎 -√ 𝑎 + 𝑥 = 𝑥 , let's start by squaring both · · · GLYPH<16> √︁ 𝑎 -√ 𝑎 + 𝑥 GLYPH<17> 2 = 𝑥 2 = ⇒ 𝑎 -√ 𝑎 + 𝑥 = 𝑥 2 . Rearrange to isolate the inner square root term: ( 𝑎 -𝑥 2 ) 2 = 𝑎 + 𝑥 = ⇒ 𝑎 2 -2 𝑎𝑥 2 + ( 𝑥 2 ) 2 = 𝑎 + 𝑥 = ⇒ 𝑥 4 -2 𝑎𝑥 2 -𝑥 + ( 𝑎 2 -𝑎 ) = 0 . . . Wait, wait. Wait. That's an aha moment I can flag here. Let's reevaluate this step-by-step to identify if the correct sum can be · · · We started with the equation: √︁ 𝑎 -√ 𝑎 + 𝑥 = 𝑥 First, let's square both sides: 𝑎 -√ 𝑎 + 𝑥 = 𝑥 2 = ⇒ √ 𝑎 + 𝑥 = 𝑎 -𝑥 2 Next, I could square both sides again, treating the equation: · · · . . .\\n```\\n\\nTable 3 | An interesting 'aha moment' of an intermediate version of DeepSeek-R1-Zero. The model learns to rethink using an anthropomorphic tone. This is also an aha moment for us, allowing us to witness the power and beauty of reinforcement learning.\\n\\nDrawback of DeepSeek-R1-Zero Although DeepSeek-R1-Zero exhibits strong reasoning capabilities and autonomously develops unexpected and powerful reasoning behaviors, it faces several issues. For instance, DeepSeek-R1-Zero struggles with challenges like poor readability, and language mixing. To make reasoning processes more readable and share them with the open community, we explore DeepSeek-R1, a method that utilizes RL with human-friendly cold-start data.\\n\\n## 2.3. DeepSeek-R1: Reinforcement Learning with Cold Start\\n\\nInspired by the promising results of DeepSeek-R1-Zero, two natural questions arise: 1) Can reasoning performance be further improved or convergence accelerated by incorporating a small amount of high-quality data as a cold start? 2) How can we train a user-friendly model that not only produces clear and coherent Chains of Thought (CoT) but also demonstrates strong general capabilities? To address these questions, we design a pipeline to train DeepSeek-R1. The pipeline consists of four stages, outlined as follows.\\n\\n## 2.3.1. Cold Start\\n\\nUnlike DeepSeek-R1-Zero, to prevent the early unstable cold start phase of RL training from the base model, for DeepSeek-R1 we construct and collect a small amount of long CoT data to fine-tune the model as the initial RL actor. To collect such data, we have explored several approaches: using few-shot prompting with a long CoT as an example, directly prompting models to generate detailed answers with reflection and verification, gathering DeepSeek-R1Zero outputs in a readable format, and refining the results through post-processing by human annotators.\\n\\nIn this work, we collect thousands of cold-start data to fine-tune the DeepSeek-V3-Base as the starting point for RL. Compared to DeepSeek-R1-Zero, the advantages of cold start data\\n\\n## include:\\n\\n- · Readability: A key limitation of DeepSeek-R1-Zero is that its content is often not suitable for reading. Responses may mix multiple languages or lack markdown formatting to highlight answers for users. In contrast, when creating cold-start data for DeepSeek-R1, we design a readable pattern that includes a summary at the end of each response and filters out responses that are not reader-friendly. Here, we define the output format as |special\\\\_token||special\\\\_token|, where the reasoning process is the CoT for the query, and the summary is used to summarize the reasoning results.\\n- · Potential: By carefully designing the pattern for cold-start data with human priors, we observe better performance against DeepSeek-R1-Zero. We believe the iterative training is a better way for reasoning models.\\n\\n## 2.3.2. Reasoning-oriented Reinforcement Learning\\n\\nAfter fine-tuning DeepSeek-V3-Base on the cold start data, we apply the same large-scale reinforcement learning training process as employed in DeepSeek-R1-Zero. This phase focuses on enhancing the model's reasoning capabilities, particularly in reasoning-intensive tasks such as coding, mathematics, science, and logic reasoning, which involve well-defined problems with clear solutions. During the training process, we observe that CoT often exhibits language mixing, particularly when RL prompts involve multiple languages. To mitigate the issue of language mixing, we introduce a language consistency reward during RL training, which is calculated as the proportion of target language words in the CoT. Although ablation experiments show that such alignment results in a slight degradation in the model's performance, this reward aligns with human preferences, making it more readable. Finally, we combine the accuracy of reasoning tasks and the reward for language consistency by directly summing them to form the final reward. We then apply reinforcement learning (RL) training on the fine-tuned model until it achieves convergence on reasoning tasks.\\n\\n## 2.3.3. Rejection Sampling and Supervised Fine-Tuning\\n\\nWhen reasoning-oriented RL converges, we utilize the resulting checkpoint to collect SFT (Supervised Fine-Tuning) data for the subsequent round. Unlike the initial cold-start data, which primarily focuses on reasoning, this stage incorporates data from other domains to enhance the model's capabilities in writing, role-playing, and other general-purpose tasks. Specifically, we generate the data and fine-tune the model as described below.\\n\\nReasoning data We curate reasoning prompts and generate reasoning trajectories by performing rejection sampling from the checkpoint from the above RL training. In the previous stage, we only included data that could be evaluated using rule-based rewards. However, in this stage, we expand the dataset by incorporating additional data, some of which use a generative reward model by feeding the ground-truth and model predictions into DeepSeek-V3 for judgment. Additionally, because the model output is sometimes chaotic and difficult to read, we have filtered out chain-of-thought with mixed languages, long parapraphs, and code blocks. For each prompt, we sample multiple responses and retain only the correct ones. In total, we collect about 600k reasoning related training samples.\\n\\nNon-Reasoning data For non-reasoning data, such as writing, factual QA, self-cognition, and translation, we adopt the DeepSeek-V3 pipeline and reuse portions of the SFT dataset of DeepSeek-V3. For certain non-reasoning tasks, we call DeepSeek-V3 to generate a potential chain-of-thought before answering the question by prompting. However, for simpler queries, such as 'hello' we do not provide a CoT in response. In the end, we collected a total of approximately 200k training samples that are unrelated to reasoning.\\n\\nWe fine-tune DeepSeek-V3-Base for two epochs using the above curated dataset of about 800k samples.\\n\\n## 2.3.4. Reinforcement Learning for all Scenarios\\n\\nTo further align the model with human preferences, we implement a secondary reinforcement learning stage aimed at improving the model's helpfulness and harmlessness while simultaneously refining its reasoning capabilities. Specifically, we train the model using a combination of reward signals and diverse prompt distributions. For reasoning data, we adhere to the methodology outlined in DeepSeek-R1-Zero, which utilizes rule-based rewards to guide the learning process in math, code, and logical reasoning domains. For general data, we resort to reward models to capture human preferences in complex and nuanced scenarios. We build upon the DeepSeek-V3 pipeline and adopt a similar distribution of preference pairs and training prompts. For helpfulness, we focus exclusively on the final summary, ensuring that the assessment emphasizes the utility and relevance of the response to the user while minimizing interference with the underlying reasoning process. For harmlessness, we evaluate the entire response of the model, including both the reasoning process and the summary, to identify and mitigate any potential risks, biases, or harmful content that may arise during the generation process. Ultimately, the integration of reward signals and diverse data distributions enables us to train a model that excels in reasoning while prioritizing helpfulness and harmlessness.\\n\\n## 2.4. Distillation: Empower Small Models with Reasoning Capability\\n\\nTo equip more efficient smaller models with reasoning capabilities like DeekSeek-R1, we directly fine-tuned open-source models like Qwen (Qwen, 2024b) and Llama (AI@Meta, 2024) using the 800k samples curated with DeepSeek-R1, as detailed in §2.3.3. Our findings indicate that this straightforward distillation method significantly enhances the reasoning abilities of smaller models. The base models we use here are Qwen2.5-Math-1.5B, Qwen2.5-Math-7B, Qwen2.514B, Qwen2.5-32B, Llama-3.1-8B, and Llama-3.3-70B-Instruct. We select Llama-3.3 because its reasoning capability is slightly better than that of Llama-3.1.\\n\\nFor distilled models, we apply only SFT and do not include an RL stage, even though incorporating RL could substantially boost model performance. Our primary goal here is to demonstrate the effectiveness of the distillation technique, leaving the exploration of the RL stage to the broader research community.\\n\\n## 3. Experiment\\n\\nBenchmarks We evaluate models on MMLU (Hendrycks et al., 2020), MMLU-Redux (Gema et al., 2024), MMLU-Pro (Wang et al., 2024), C-Eval (Huang et al., 2023), and CMMLU (Li et al., 2023), IFEval (Zhou et al., 2023), FRAMES (Krishna et al., 2024), GPQA Diamond (Rein et al., 2023), SimpleQA (OpenAI, 2024c), C-SimpleQA (He et al., 2024), SWE-Bench Verified (OpenAI,\\n\\n2024d), Aider 1 , LiveCodeBench (Jain et al., 2024) (2024-08 - 2025-01), Codeforces 2 , Chinese National High School Mathematics Olympiad (CNMO 2024) 3 , and American Invitational Mathematics Examination 2024 (AIME 2024) (MAA, 2024). In addition to standard benchmarks, we also evaluate our models on open-ended generation tasks using LLMs as judges. Specifically, we adhere to the original configurations of AlpacaEval 2.0 (Dubois et al., 2024) and Arena-Hard (Li et al., 2024), which leverage GPT-4-Turbo-1106 as judges for pairwise comparisons. Here, we only feed the final summary to evaluation to avoid the length bias. For distilled models, we report representative results on AIME 2024, MATH-500, GPQA Diamond, Codeforces, and LiveCodeBench.\\n\\nEvaluation Prompts Following the setup in DeepSeek-V3, standard benchmarks such as MMLU, DROP, GPQA Diamond, and SimpleQA are evaluated using prompts from the simpleevals framework. For MMLU-Redux, we adopt the Zero-Eval prompt format (Lin, 2024) in a zero-shot setting. In terms of MMLU-Pro, C-Eval and CLUE-WSC, since the original prompts are few-shot, we slightly modify the prompt to the zero-shot setting. The CoT in few-shot may hurt the performance of DeepSeek-R1. Other datasets follow their original evaluation protocols with default prompts provided by their creators. For code and math benchmarks, the HumanEval-Mul dataset covers eight mainstream programming languages (Python, Java, C++, C#, JavaScript, TypeScript, PHP, and Bash). Model performance on LiveCodeBench is evaluated using CoT format, with data collected between August 2024 and January 2025. The Codeforces dataset is evaluated using problems from 10 Div.2 contests along with expert-crafted test cases, after which the expected ratings and percentages of competitors are calculated. SWE-Bench verified results are obtained via the agentless framework (Xia et al., 2024). AIDER-related benchmarks are measured using a \\\"diff\\\" format. DeepSeek-R1 outputs are capped at a maximum of 32,768 tokens for each benchmark.\\n\\nBaselines We conduct comprehensive evaluations against several strong baselines, including DeepSeek-V3, Claude-Sonnet-3.5-1022, GPT-4o-0513, OpenAI-o1-mini, and OpenAI-o1-1217. Since accessing the OpenAI-o1-1217 API is challenging in mainland China, we report its performance based on official reports. For distilled models, we also compare the open-source model QwQ-32B-Preview (Qwen, 2024a).\\n\\nGeneration Setup For all our models, the maximum generation length is set to 32,768 tokens. For benchmarks requiring sampling, we use a temperature of 0.6, a top-p value of 0.95, and generate 64 responses per query to estimate pass@1.\\n\\n## 3.1. DeepSeek-R1 Evaluation\\n\\nFor education-oriented knowledge benchmarks such as MMLU, MMLU-Pro, and GPQA Diamond, DeepSeek-R1 demonstrates superior performance compared to DeepSeek-V3. This improvement is primarily attributed to enhanced accuracy in STEM-related questions, where significant gains are achieved through large-scale reinforcement learning (RL). Additionally, DeepSeek-R1 excels on FRAMES, a long-context-dependent QA task, showcasing its strong document analysis capabilities. This highlights the potential of reasoning models in AI-driven\\n\\nTable 4 | Comparison between DeepSeek-R1 and other representative models.\\n\\n| Benchmark (Metric) | Claude-3.5- GPT-4o DeepSeek OpenAI OpenAI DeepSeek Sonnet-1022 | 0513 | V3 | | o1-mini o1-1217 | R1 |\\n|----------------------------|------------------------------------------------------------------|--------|------|------|-------------------|------|\\n| Architecture | - | - | MoE | - | - | MoE |\\n| # Activated Params | - | - | 37B | - | - | 37B |\\n| # Total Params | - | - | 671B | - | - | 671B |\\n| MMLU(Pass@1) | 88.3 | 87.2 | 88.5 | 85.2 | 91.8 | 90.8 |\\n| MMLU-Redux (EM) | 88.9 | 88.0 | 89.1 | 86.7 | - | 92.9 |\\n| MMLU-Pro (EM) | 78.0 | 72.6 | 75.9 | 80.3 | - | 84.0 |\\n| DROP (3-shot F1) | 88.3 | 83.7 | 91.6 | 83.9 | 90.2 | 92.2 |\\n| IF-Eval (Prompt Strict) | 86.5 | 84.3 | 86.1 | 84.8 | - | 83.3 |\\n| GPQA Diamond (Pass@1) | 65.0 | 49.9 | 59.1 | 60.0 | 75.7 | 71.5 |\\n| SimpleQA (Correct) | 28.4 | 38.2 | 24.9 | 7.0 | 47.0 | 30.1 |\\n| FRAMES (Acc.) | 72.5 | 80.5 | 73.3 | 76.9 | - | 82.5 |\\n| AlpacaEval2.0 (LC-winrate) | 52.0 | 51.1 | 70.0 | 57.8 | - | 87.6 |\\n| ArenaHard (GPT-4-1106) | 85.2 | 80.4 | 85.5 | 92.0 | - | 92.3 |\\n| LiveCodeBench (Pass@1-COT) | 38.9 | 32.9 | 36.2 | 53.8 | 63.4 | 65.9 |\\n| Codeforces (Percentile) | 20.3 | 23.6 | 58.7 | 93.4 | 96.6 | 96.3 |\\n| Codeforces (Rating) | 717 | 759 | 1134 | 1820 | 2061 | 2029 |\\n| SWE Verified (Resolved) | 50.8 | 38.8 | 42.0 | 41.6 | 48.9 | 49.2 |\\n| Aider-Polyglot (Acc.) | 45.3 | 16.0 | 49.6 | 32.9 | 61.7 | 53.3 |\\n| AIME 2024 (Pass@1) | 16.0 | 9.3 | 39.2 | 63.6 | 79.2 | 79.8 |\\n| MATH-500 (Pass@1) | 78.3 | 74.6 | 90.2 | 90.0 | 96.4 | 97.3 |\\n| CNMO2024 (Pass@1) | 13.1 | 10.8 | 43.2 | 67.6 | - | 78.8 |\\n| CLUEWSC (EM) | 85.4 | 87.9 | 90.9 | 89.9 | - | 92.8 |\\n| C-Eval (EM) | 76.7 | 76.0 | 86.5 | 68.9 | - | 91.8 |\\n| C-SimpleQA (Correct) | 55.4 | 58.7 | 68.0 | 40.3 | - | 63.7 |\\n\\nsearch and data analysis tasks. On the factual benchmark SimpleQA, DeepSeek-R1 outperforms DeepSeek-V3, demonstrating its capability in handling fact-based queries. A similar trend is observed where OpenAI-o1 surpasses GPT-4o on this benchmark. However, DeepSeek-R1 performs worse than DeepSeek-V3 on the Chinese SimpleQA benchmark, primarily due to its tendency to refuse answering certain queries after safety RL. Without safety RL, DeepSeek-R1 could achieve an accuracy of over 70%.\\n\\nDeepSeek-R1 also delivers impressive results on IF-Eval, a benchmark designed to assess a model's ability to follow format instructions. These improvements can be linked to the inclusion of instruction-following data during the final stages of supervised fine-tuning (SFT) and RL training. Furthermore, remarkable performance is observed on AlpacaEval2.0 and ArenaHard, indicating DeepSeek-R1's strengths in writing tasks and open-domain question answering. Its significant outperformance of DeepSeek-V3 underscores the generalization benefits of large-scale RL, which not only boosts reasoning capabilities but also improves performance across diverse domains. Moreover, the summary lengths generated by DeepSeek-R1 are concise, with an average of 689 tokens on ArenaHard and 2,218 characters on AlpacaEval 2.0. This indicates that DeepSeek-R1 avoids introducing length bias during GPT-based evaluations, further solidifying its robustness across multiple tasks.\\n\\nOn math tasks, DeepSeek-R1 demonstrates performance on par with OpenAI-o1-1217, surpassing other models by a large margin. A similar trend is observed on coding algorithm tasks, such as LiveCodeBench and Codeforces, where reasoning-focused models dominate these benchmarks. On engineering-oriented coding tasks, OpenAI-o1-1217 outperforms DeepSeek-R1 on Aider but achieves comparable performance on SWE Verified. We believe the engineering\\n\\nperformance of DeepSeek-R1 will improve in the next version, as the amount of related RL training data currently remains very limited.\\n\\n## 3.2. Distilled Model Evaluation\\n\\nTable 5 | Comparison of DeepSeek-R1 distilled models and other comparable models on reasoning-related benchmarks.\\n\\n| Model | AIME 2024 | AIME 2024 | MATH-500 | GPQA Diamond | LiveCode Bench | CodeForces |\\n|-------------------------------|-------------|-------------|------------|----------------|------------------|--------------|\\n| | pass@1 | cons@64 | pass@1 | pass@1 | pass@1 | rating |\\n| GPT-4o-0513 | 9.3 | 13.4 | 74.6 | 49.9 | 32.9 | 759 |\\n| Claude-3.5-Sonnet-1022 | 16.0 | 26.7 | 78.3 | 65.0 | 38.9 | 717 |\\n| OpenAI-o1-mini | 63.6 | 80.0 | 90.0 | 60.0 | 53.8 | 1820 |\\n| QwQ-32B-Preview | 50.0 | 60.0 | 90.6 | 54.5 | 41.9 | 1316 |\\n| DeepSeek-R1-Distill-Qwen-1.5B | 28.9 | 52.7 | 83.9 | 33.8 | 16.9 | 954 |\\n| DeepSeek-R1-Distill-Qwen-7B | 55.5 | 83.3 | 92.8 | 49.1 | 37.6 | 1189 |\\n| DeepSeek-R1-Distill-Qwen-14B | 69.7 | 80.0 | 93.9 | 59.1 | 53.1 | 1481 |\\n| DeepSeek-R1-Distill-Qwen-32B | 72.6 | 83.3 | 94.3 | 62.1 | 57.2 | 1691 |\\n| DeepSeek-R1-Distill-Llama-8B | 50.4 | 80.0 | 89.1 | 49.0 | 39.6 | 1205 |\\n| DeepSeek-R1-Distill-Llama-70B | 70.0 | 86.7 | 94.5 | 65.2 | 57.5 | 1633 |\\n\\nAs shown in Table 5, simply distilling DeepSeek-R1's outputs enables the efficient DeepSeekR1-7B (i.e., DeepSeek-R1-Distill-Qwen-7B, abbreviated similarly below) to outperform nonreasoning models like GPT-4o-0513 across the board. DeepSeek-R1-14B surpasses QwQ-32BPreview on all evaluation metrics, while DeepSeek-R1-32B and DeepSeek-R1-70B significantly exceed o1-mini on most benchmarks. These results demonstrate the strong potential of distillation. Additionally, we found that applying RL to these distilled models yields significant further gains. We believe this warrants further exploration and therefore present only the results of the simple SFT-distilled models here.\\n\\n## 4. Discussion\\n\\n## 4.1. Distillation v.s. Reinforcement Learning\\n\\nTable 6 | Comparison of distilled and RL Models on Reasoning-Related Benchmarks.\\n\\n| | AIME 2024 | AIME 2024 | MATH-500 | GPQADiamond | LiveCodeBench |\\n|------------------------------|-------------|-------------|------------|---------------|-----------------|\\n| Model | pass@1 | cons@64 | pass@1 | pass@1 | pass@1 |\\n| QwQ-32B-Preview | 50.0 | 60.0 | 90.6 | 54.5 | 41.9 |\\n| DeepSeek-R1-Zero-Qwen-32B | 47.0 | 60.0 | 91.6 | 55.0 | 40.2 |\\n| DeepSeek-R1-Distill-Qwen-32B | 72.6 | 83.3 | 94.3 | 62.1 | 57.2 |\\n\\nIn Section 3.2, we can see that by distilling DeepSeek-R1, the small model can achieve impressive results. However, there is still one question left: can the model achieve comparable performance through the large-scale RL training discussed in the paper without distillation?\\n\\nTo answer this question, we conduct large-scale RL training on Qwen-32B-Base using math, code, and STEM data, training for over 10K steps, resulting in DeepSeek-R1-Zero-Qwen-32B. The experimental results, shown in Figure 6, demonstrate that the 32B base model, after large-scale\\n\\nRL training, achieves performance on par with QwQ-32B-Preview. However, DeepSeek-R1Distill-Qwen-32B, which is distilled from DeepSeek-R1, performs significantly better than DeepSeek-R1-Zero-Qwen-32B across all benchmarks. Therefore, we can draw two conclusions: First, distilling more powerful models into smaller ones yields excellent results, whereas smaller models relying on the large-scale RL mentioned in this paper require enormous computational power and may not even achieve the performance of distillation. Second, while distillation strategies are both economical and effective, advancing beyond the boundaries of intelligence may still require more powerful base models and larger-scale reinforcement learning.\\n\\n## 4.2. Unsuccessful Attempts\\n\\nIn the early stages of developing DeepSeek-R1, we also encountered failures and setbacks along the way. We share our failure experiences here to provide insights, but this does not imply that these approaches are incapable of developing effective reasoning models.\\n\\nProcess Reward Model (PRM) PRM is a reasonable method to guide the model toward better approaches for solving reasoning tasks (Lightman et al., 2023; Uesato et al., 2022; Wang et al., 2023). However, in practice, PRM has three main limitations that may hinder its ultimate success. First, it is challenging to explicitly define a fine-grain step in general reasoning. Second, determining whether the current intermediate step is correct is a challenging task. Automated annotation using models may not yield satisfactory results, while manual annotation is not conducive to scaling up. Third, once a model-based PRM is introduced, it inevitably leads to reward hacking (Gao et al., 2022), and retraining the reward model needs additional training resources and it complicates the whole training pipeline. In conclusion, while PRM demonstrates a good ability to rerank the top-N responses generated by the model or assist in guided search (Snell et al., 2024), its advantages are limited compared to the additional computational overhead it introduces during large-scale reinforcement learning process in our experiments.\\n\\nMonteCarlo Tree Search (MCTS) Inspired by AlphaGo (Silver et al., 2017b) and AlphaZero (Silver et al., 2017a), we explored using Monte Carlo Tree Search (MCTS) to enhance test-time compute scalability. This approach involves breaking answers into smaller parts to allow the model to explore the solution space systematically. To facilitate this, we prompt the model to generate multiple tags that correspond to specific reasoning steps necessary for the search. For training, we first use collected prompts to find answers via MCTS guided by a pre-trained value model. Subsequently, we use the resulting question-answer pairs to train both the actor model and the value model, iteratively refining the process.\\n\\nHowever, this approach encounters several challenges when scaling up the training. First, unlike chess, where the search space is relatively well-defined, token generation presents an exponentially larger search space. To address this, we set a maximum extension limit for each node, but this can lead to the model getting stuck in local optima. Second, the value model directly influences the quality of generation since it guides each step of the search process. Training a fine-grained value model is inherently difficult, which makes it challenging for the model to iteratively improve. While AlphaGo's core success relied on training a value model to progressively enhance its performance, this principle proves difficult to replicate in our setup due to the complexities of token generation.\\n\\nIn conclusion, while MCTS can improve performance during inference when paired with a pre-trained value model, iteratively boosting model performance through self-search remains a\\n\\nsignificant challenge.\\n\\n## 5. Conclusion, Limitation, and Future Work\\n\\nIn this work, we share our journey in enhancing model reasoning abilities through reinforcement learning (RL). DeepSeek-R1-Zero represents a pure RL approach without relying on cold-start data, achieving strong performance across various tasks. DeepSeek-R1 is more powerful, leveraging cold-start data alongside iterative RL fine-tuning. Ultimately, DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on a range of tasks.\\n\\nWe further explore distillation the reasoning capability to small dense models. We use DeepSeek-R1 as the teacher model to generate 800K data, and fine-tune several small dense models. The results are promising: DeepSeek-R1-Distill-Qwen-1.5B outperforms GPT-4o and Claude-3.5-Sonnet on math benchmarks with 28.9% on AIME and 83.9% on MATH. Other dense models also achieve impressive results, significantly outperforming other instruction-tuned models based on the same underlying checkpoints.\\n\\nIn the future, we plan to invest in research across the following directions for DeepSeek-R1.\\n\\n- · General Capability: Currently, the capabilities of DeepSeek-R1 fall short of DeepSeekV3 in tasks such as function calling, multi-turn, complex role-playing, and json output. Moving forward, we plan to explore how leveraging long CoT to enhance tasks in these fields.\\n- · Language Mixing: DeepSeek-R1 is currently optimized for Chinese and English, which may result in language mixing issues when handling queries in other languages. For instance, DeepSeek-R1 might use English for reasoning and responses, even if the query is in a language other than English or Chinese. We aim to address this limitation in future updates.\\n- · Prompting Engineering: When evaluating DeepSeek-R1, we observe that it is sensitive to prompts. Few-shot prompting consistently degrades its performance. Therefore, we recommend users directly describe the problem and specify the output format using a zero-shot setting for optimal results.\\n- · Software Engineering Tasks: Due to the long evaluation times, which impact the efficiency of the RL process, large-scale RL has not been applied extensively in software engineering tasks. As a result, DeepSeek-R1 has not demonstrated a huge improvement over DeepSeek-V3 on software engineering benchmarks. Future versions will address this by implementing reject sampling on software engineering data or incorporating asynchronous evaluations during the RL process to improve efficiency.\\n\\n## References\\n\\nAI@Meta. Llama 3.1 model card, 2024. URL https://github.com/meta-llama/llama-m odels/blob/main/models/llama3\\\\_1/MODEL\\\\_CARD.md .\\n\\nAnthropic. Claude 3.5 sonnet, 2024. URL https://www.anthropic.com/news/claude-3 -5-sonnet .\\n\\n- A. Dubey, A. Jauhri, A. Pandey, A. Kadian, A. Al-Dahle, A. Letman, A. Mathur, A. Schelten, A. Yang, A. Fan, et al. The llama 3 herd of models. arXiv preprint arXiv:2407.21783, 2024.\\n- Y. Dubois, B. Galambosi, P. Liang, and T. B. Hashimoto. Length-controlled alpacaeval: A simple way to debias automatic evaluators. arXiv preprint arXiv:2404.04475, 2024.\\n\\n| X. Feng, Z. Wan, M. Wen, S. M. McAleer, Y. Wen, W. Zhang, and J. Wang. Alphazero-like tree-search can guide large language model decoding and training, 2024. URL https: //arxiv.org/abs/2309.17179 . |\\n|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\\n| L. Gao, J. Schulman, and J. Hilton. Scaling laws for reward model overoptimization, 2022. URL https://arxiv.org/abs/2210.10760 . |\\n| A. P. Gema, J. O. J. Leang, G. Hong, A. Devoto, A. C. M. Mancino, R. Saxena, X. He, Y. Zhao, X. Du, M. R. G. Madani, C. Barale, R. McHardy, J. Harris, J. Kaddour, E. van Krieken, and P. Minervini. Are we done with mmlu? CoRR, abs/2406.04127, 2024. URL https://doi.or g/10.48550/arXiv.2406.04127 . |\\n| Google. Our next-generation model: Gemini 1.5, 2024. URL https://blog.google/techno logy/ai/google-gemini-next-generation-model-february-2024 . |\\n| Y. He, S. Li, J. Liu, Y. Tan, W. Wang, H. Huang, X. Bu, H. Guo, C. Hu, B. Zheng, et al. Chi- nese simpleqa: A chinese factuality evaluation for large language models. arXiv preprint arXiv:2411.07140, 2024. |\\n| D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt. Measuring |\\n| Y. Huang, Y. Bai, Z. Zhu, J. Zhang, J. Zhang, T. Su, J. Liu, C. Lv, Y. Zhang, J. Lei, et al. C-Eval: A multi-level multi-discipline chinese evaluation suite for foundation models. arXiv preprint arXiv:2305.08322, 2023. |\\n| N. Jain, K. Han, A. Gu, W. Li, F. Yan, T. Zhang, S. Wang, A. Solar-Lezama, K. Sen, and I. Stoica. Livecodebench: Holistic and contamination free evaluation of large language models for code. CoRR, abs/2403.07974, 2024. URL https://doi.org/10.48550/arXiv.2403.07974 . |\\n| S. Krishna, K. Krishna, A. Mohananey, S. Schwarcz, A. Stambler, S. Upadhyay, and M. Faruqui. Fact, fetch, and reason: A unified evaluation of retrieval-augmented generation. CoRR, abs/2409.12941, 2024. doi: 10.48550/ARXIV.2409.12941. URL https://doi.org/10.485 50/arXiv.2409.12941 . |\\n| A. Kumar, V. Zhuang, R. Agarwal, Y. Su, J. D. Co-Reyes, A. Singh, K. Baumli, S. Iqbal, C. Bishop, R. Roelofs, et al. Training language models to self-correct via reinforcement learning. arXiv preprint arXiv:2409.12917, 2024. |\\n| H. Li, Y. Zhang, F. Koto, Y. Yang, H. Zhao, Y. Gong, N. Duan, and T. Baldwin. CMMLU: Measur- ing massive multitask language understanding in Chinese. arXiv preprint arXiv:2306.09212, 2023. |\\n| T. Li, W.-L. Chiang, E. Frick, L. Dunlap, T. Wu, B. Zhu, J. E. Gonzalez, and I. Stoica. From crowdsourced data to high-quality benchmarks: Arena-hard and benchbuilder pipeline. arXiv preprint arXiv:2406.11939, 2024. |\\n| H. Lightman, V. Kosaraju, Y. Burda, H. Edwards, B. Baker, T. Lee, J. Leike, J. Schulman, I. Sutskever, and K. Cobbe. Let's verify step by step. arXiv preprint arXiv:2305.20050, 2023. |\\n| B. Y. Lin. ZeroEval: A Unified Framework for Evaluating Language Models, July 2024. URL https://github.com/WildEval/ZeroEval . |\\n\\n| MAA. American invitational mathematics examination - aime. In American Invitational Mathematics Examination - AIME 2024, February 2024. URL https://maa.org/math -competitions/american-invitational-mathematics-examination-aime . |\\n|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\\n| OpenAI. Hello GPT-4o, 2024a. URL https://openai.com/index/hello-gpt-4o/ . |\\n| OpenAI. Learning to reason with llms, 2024b. URL https://openai.com/index/learnin |\\n| OpenAI. Introducing SimpleQA, 2024c. URL https://openai.com/index/introducing -simpleqa/ . |\\n| OpenAI. Introducing SWE-bench verified we're releasing a human-validated subset of swe- bench that more, 2024d. URL https://openai.com/index/introducing-swe-bench -verified/ . |\\n| Qwen. Qwq: Reflect deeply on the boundaries of the unknown, 2024a. URL https://qwenlm .github.io/blog/qwq-32b-preview/ . |\\n| Qwen. Qwen2.5: A party of foundation models, 2024b. URL https://qwenlm.github.io/b |\\n| GPQA: A graduate-level google-proof q&a benchmark. arXiv preprint arXiv:2311.12022, 2023. Z. Shao, P. Wang, Q. Zhu, R. Xu, J. Song, M. Zhang, Y. Li, Y. Wu, and D. Guo. Deepseekmath: |\\n| Pushing the limits of mathematical reasoning in open language models. arXiv preprint arXiv:2402.03300, 2024. D. Silver, T. Hubert, J. Schrittwieser, I. Antonoglou, M. Lai, A. Guez, M. Lanctot, L. Sifre, D. Kumaran, T. Graepel, T. P. Lillicrap, K. Simonyan, and D. Hassabis. Mastering chess and |\\n| D. Silver, J. Schrittwieser, K. Simonyan, I. Antonoglou, A. Huang, A. Guez, T. Hubert, L. Baker, M. Lai, A. Bolton, Y. Chen, T. P. Lillicrap, F. Hui, L. Sifre, G. van den Driessche, T. Graepel, and D. Hassabis. Mastering the game of go without human knowledge. Nat., 550(7676):354-359, 2017b. doi: 10.1038/NATURE24270. URL https://doi.org/10.1038/nature24270 . |\\n| https://arxiv.org/abs/2408.033 |\\n| C. Snell, J. Lee, K. Xu, and A. Kumar. Scaling llm test-time compute optimally can be more effective than scaling model parameters, 2024. URL 14 . |\\n| T. Trinh, Y. Wu, Q. Le, H. He, and T. Luong. Solving olympiad geometry without human demonstrations. Nature, 2024. doi: 10.1038/s41586-023-06747-5. |\\n| J. Uesato, N. Kushman, R. Kumar, F. Song, N. Siegel, L. Wang, A. Creswell, G. Irving, and I. Higgins. Solving math word problems with process-and outcome-based feedback. arXiv |\\n| P. Wang, L. Li, Z. Shao, R. Xu, D. Dai, Y. Li, D. Chen, Y. Wu, and Z. Sui. Math-shepherd: A label- free step-by-step verifier for llms in mathematical reasoning. arXiv preprint arXiv:2312.08935, |\\n\\n- Y. Wang, X. Ma, G. Zhang, Y. Ni, A. Chandra, S. Guo, W. Ren, A. Arulraj, X. He, Z. Jiang, T. Li, M. Ku, K. Wang, A. Zhuang, R. Fan, X. Yue, and W. Chen. Mmlu-pro: A more robust and challenging multi-task language understanding benchmark. CoRR, abs/2406.01574, 2024. URL https://doi.org/10.48550/arXiv.2406.01574 .\\n- C. S. Xia, Y. Deng, S. Dunn, and L. Zhang. Agentless: Demystifying llm-based software engineering agents. arXiv preprint, 2024.\\n- H. Xin, Z. Z. Ren, J. Song, Z. Shao, W. Zhao, H. Wang, B. Liu, L. Zhang, X. Lu, Q. Du, W. Gao, Q. Zhu, D. Yang, Z. Gou, Z. F. Wu, F. Luo, and C. Ruan. Deepseek-prover-v1.5: Harnessing proof assistant feedback for reinforcement learning and monte-carlo tree search, 2024. URL https://arxiv.org/abs/2408.08152 .\\n- J. Zhou, T. Lu, S. Mishra, S. Brahma, S. Basu, Y. Luan, D. Zhou, and L. Hou. Instruction-following evaluation for large language models. arXiv preprint arXiv:2311.07911, 2023.\\n\\n## Appendix\\n\\n## A. Contributions and Acknowledgments\\n\\n| Core Contributors | Hui Li |\\n|---------------------------|-----------------|\\n| Daya Guo | Jianzhong Guo |\\n| Dejian Yang | Jiashi Li |\\n| Haowei Zhang | Jingchang Chen |\\n| Junxiao Song | Jingyang Yuan |\\n| Ruoyu Zhang | Jinhao Tu |\\n| Runxin Xu | Junjie Qiu |\\n| Qihao Zhu | Junlong Li |\\n| Shirong Ma | J.L. Cai |\\n| Peiyi Wang | Jiaqi Ni |\\n| Xiao Bi | Jian Liang |\\n| Xiaokang Zhang | Jin Chen |\\n| Xingkai Yu | Kai Dong |\\n| Yu Wu | Kai Hu* |\\n| Z.F. Wu | Kaichao You |\\n| Zhibin Gou | Kaige Gao |\\n| Zhihong Shao | Kang Guan |\\n| Zhuoshu Li | Kexin Huang |\\n| Ziyi Gao | Kuai Yu |\\n| | Lean Wang |\\n| | Lecong Zhang |\\n| Contributors Aixin Liu | Liang Zhao |\\n| Bing Xue | Litong Wang |\\n| Bingxuan Wang | Liyue Zhang |\\n| Bochao Wu | Lei Xu |\\n| Bei Feng | Leyi Xia |\\n| | Mingchuan Zhang |\\n| Chengda Lu Chenggang Zhao | Minghua Zhang |\\n| Chengqi Deng | Minghui Tang |\\n| Chong Ruan | Mingxu Zhou |\\n| | Meng Li |\\n| Damai Dai | Miaojun Wang |\\n| Deli Chen | Mingming Li |\\n| Dongjie Ji | Ning Tian |\\n| Erhang Li | Panpan Huang |\\n| Fangyun Lin | Peng Zhang |\\n| Fucong Dai Fuli Luo* | Qiancheng Wang |\\n| Guangbo Hao | Qinyu Chen |\\n| Guanting Chen | Qiushi Du |\\n| Guowei Li | Ruiqi Ge* |\\n| H. Zhang | Ruisong Zhang |\\n| Hanwei Xu | Ruizhe Pan |\\n| Honghui Ding | Runji Wang |\\n| Huazuo Gao | R.J. Chen |\\n| Hui Qu | R.L. Jin |\\n\\n| Ruyi Chen | Y.X. Wei |\\n|------------------------|---------------------------|\\n| Shanghao Lu | Yang Zhang |\\n| Shangyan Zhou | Yanhong Xu |\\n| Shanhuang Chen | Yao Li |\\n| Shengfeng Ye | Yao Zhao |\\n| Shiyu Wang | Yaofeng Sun |\\n| Shuiping Yu | Yaohui Wang |\\n| Shunfeng Zhou | Yi Yu |\\n| Shuting Pan | Yichao Zhang |\\n| S.S. Li | Yifan Shi |\\n| Shuang Zhou | Yiliang Xiong |\\n| Shaoqing Wu | Ying He |\\n| Shengfeng Ye | Yishi Piao |\\n| Tao Yun | Yisong Wang |\\n| Tian Pei | Yixuan Tan |\\n| Tianyu Sun | Yiyang Ma* |\\n| T. Wang | Yiyuan Liu |\\n| Wangding Zeng | Yongqiang Guo |\\n| Wen Liu | Yuan Ou |\\n| Wenfeng Liang | Yuduan Wang |\\n| Wenjun Gao | Yue Gong |\\n| Wenqin Yu* | Yuheng Zou |\\n| Wentao Zhang | Yujia He |\\n| W.L. Xiao | Yunfan Xiong |\\n| Wei An | Yuxiang Luo |\\n| Xiaodong Liu | Yuxiang You |\\n| Xiaohan Wang | Yuxuan Liu |\\n| Xiaokang Chen | Yuyang Zhou |\\n| Xiaotao Nie | Y.X. Zhu |\\n| Xin Cheng | Yanping Huang |\\n| Xin Liu | Yaohui Li |\\n| Xin Xie | Yi Zheng |\\n| Xingchao Liu | Yuchen Zhu |\\n| Xinyu Yang | Yunxian Ma |\\n| Xinyuan Li | Ying Tang |\\n| Xuecheng Su | Yukun Zha |\\n| Xuheng Lin | Yuting Yan |\\n| X.Q. Li | Z.Z. Ren |\\n| Xiangyue Jin | Zehui Ren |\\n| Xiaojin Shen | Zhangli Sha |\\n| Xiaosha Chen | Zhe Fu |\\n| Xiaowen Sun | Zhean Xu |\\n| Xiaoxiang Wang | Zhenda Xie |\\n| Xinnan Song Xinyi Zhou | Zhengyan Zhang Zhewen Hao |\\n| Xianzu Wang | |\\n| | Zhicheng Ma |\\n| Xinxia Shan Y.K. Li | Zhigang Yan Zhiyu Wu |\\n| Y.Q. Wang | Zihui Gu |\\n\\n| Zijia Zhu | Zhen Huang |\\n|-------------|---------------|\\n| Zijun Liu* | Zhipeng Xu |\\n| Zilin Li | Zhongyu Zhang |\\n| Ziwei Xie | Zhen Zhang |\\n| Ziyang Song | |\\n| Zizheng Pan | |\\n\\nWithin each role, authors are listed alphabetically by the first name. Names marked with * denote individuals who have departed from our team.\"\n", - " }\n", + " \"end_offset\": 4000,\n", + " \"content\": \"\\n\\n## DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning\\n\\nDeepSeek-AI\\n\\nresearch@deepseek.com\\n\\n## Abstract\\n\\nWe introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeekR1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama.\\n\\nFigure 1 | Benchmark performance of DeepSeek-R1.\\n\\n\\n\\n## Contents\\n\\n| 1 Introduction | 1 Introduction | 1 Introduction | 3 |\\n|-------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------|-----|\\n| 1.1 | Contributions | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 4 |\\n| 1.2 | Summary of Evaluation Results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | Summary of Evaluation Results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 4 |\\n| 2 | Approach | Approach | 5 |\\n| 2.1 | Overview . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | Overview . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 5 |\\n| 2.2 | DeepSeek-R1-Zero: Reinforcement Learning on the Base Model | . . . . . . . . . . | 5 |\\n| | 2.2.1 | Reinforcement Learning Algorithm . . . . . . . . . . . . . . . . . . . . . . | 5 |\\n| | 2.2.2 | Reward Modeling . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 6 |\\n| | 2.2.3 | Training Template . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | 6 |\\n| | 2.2.4 | Performance, Self-evolution Process and Aha Moment of DeepSeek-R1-Zero | 6 |\\n| 2.3 | DeepSeek-R1: Reinforcement Learning with Cold Start . . . . . . . . . . . | . . . . \"\n", + " },\n", + " \"search_result\": null,\n", + " \"doc_list\": null,\n", + " \"chunk_list\": null,\n", + " \"chunks_read\": null\n", " }\n", " }\n", " ],\n", @@ -303,35 +374,41 @@ " \"function\": \"reader_tool\",\n", " \"arguments\": {\n", " \"action\": \"read\",\n", - " \"doc_id\": \"DOC_151075259435909708\",\n", + " \"doc_id\": \"DOC_2439136994867403026\",\n", " \"start_offset\": 0,\n", - " \"end_offset\": {}\n", + " \"end_offset\": 4000\n", " }\n", " }\n", " ],\n", " \"reason\": {\n", - " \"title\": \"Prerequisite Data Collection\",\n", - " \"content\": \"The paper's content must be fully extracted before meaningful comparison can be made with the 5 retrieved recent works (2023-2024). The search results show relevant focus areas including training efficiency (2410.19456), architecture alternatives (2412.15978), and parameter tuning (2411.08610) that will enable targeted comparison once DeepSeek's methodology is known.\",\n", - " \"confidence_score\": 0.92\n", + " \"title\": \"Initial Content Reading Strategy Adjustment\",\n", + " \"content\": \"Since chunk reading failed, switching to direct document reading with specific offsets to understand the paper's content. Starting with the first portion which should contain the abstract and introduction.\",\n", + " \"confidence_score\": 0.9\n", " }\n", - "}\n", - "ReAct Round #3 Analysis:\n", - " {\n", - " \"analysis\": \"DeepSeek-R1 presents a novel RL-centric approach to enhance LLM reasoning without SFT, validated by competitive benchmarks. Key innovations include cold-start data integration and scalable distillation. Comparison with 2023-2024 works reveals distinct methodological differences in training efficiency and architectural choices.\",\n", - " \"planned_actions\": [],\n", - " \"extension_needed\": false,\n", - " \"milestone\": null,\n", - " \"action_strategy\": \"sequential\",\n", - " \"action_batch_size\": null,\n", - " \"reason\": {\n", - " \"title\": \"Final Analysis Ready\",\n", - " \"content\": \"Full paper content and comparative papers have been processed. Structured summary and critical evaluation can now be formulated.\",\n", - " \"confidence_score\": 0.97\n", - " },\n", - " \"action_requests\": [],\n", - " \"action_required\": false\n", "}\n" ] + }, + { + "ename": "CancelledError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mCancelledError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m a\u001b[38;5;241m.\u001b[39mReAct(\n\u001b[1;32m 2\u001b[0m instruct\u001b[38;5;241m=\u001b[39m{\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minstruction\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexplain to me what the paper is about in detail, compare with other recent papers on same discipline and provide a comparison of the results\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontext\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpaper_url\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mstr\u001b[39m(fp)},\n\u001b[1;32m 5\u001b[0m },\n\u001b[1;32m 6\u001b[0m extension_allowed\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 7\u001b[0m max_extensions\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m 8\u001b[0m verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 9\u001b[0m response_format\u001b[38;5;241m=\u001b[39mResearchReport,\n\u001b[1;32m 10\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/session/branch.py:1679\u001b[0m, in \u001b[0;36mBranch.ReAct\u001b[0;34m(self, instruct, interpret, interpret_domain, interpret_style, interpret_sample, interpret_kwargs, tools, tool_schemas, response_format, extension_allowed, max_extensions, response_kwargs, return_analysis, analysis_model, verbose, **kwargs)\u001b[0m\n\u001b[1;32m 1611\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1612\u001b[0m \u001b[38;5;124;03mPerforms a multi-step \"ReAct\" flow (inspired by the ReAct paradigm in LLM usage),\u001b[39;00m\n\u001b[1;32m 1613\u001b[0m \u001b[38;5;124;03mwhich may include:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1675\u001b[0m \u001b[38;5;124;03m or `extensions` (the remaining allowed expansions) is `0`.\u001b[39;00m\n\u001b[1;32m 1676\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1677\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlionagi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moperations\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mReAct\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mReAct\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ReAct\n\u001b[0;32m-> 1679\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m ReAct(\n\u001b[1;32m 1680\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1681\u001b[0m instruct,\n\u001b[1;32m 1682\u001b[0m interpret\u001b[38;5;241m=\u001b[39minterpret,\n\u001b[1;32m 1683\u001b[0m interpret_domain\u001b[38;5;241m=\u001b[39minterpret_domain,\n\u001b[1;32m 1684\u001b[0m interpret_style\u001b[38;5;241m=\u001b[39minterpret_style,\n\u001b[1;32m 1685\u001b[0m interpret_sample\u001b[38;5;241m=\u001b[39minterpret_sample,\n\u001b[1;32m 1686\u001b[0m interpret_kwargs\u001b[38;5;241m=\u001b[39minterpret_kwargs,\n\u001b[1;32m 1687\u001b[0m tools\u001b[38;5;241m=\u001b[39mtools,\n\u001b[1;32m 1688\u001b[0m tool_schemas\u001b[38;5;241m=\u001b[39mtool_schemas,\n\u001b[1;32m 1689\u001b[0m response_format\u001b[38;5;241m=\u001b[39mresponse_format,\n\u001b[1;32m 1690\u001b[0m extension_allowed\u001b[38;5;241m=\u001b[39mextension_allowed,\n\u001b[1;32m 1691\u001b[0m max_extensions\u001b[38;5;241m=\u001b[39mmax_extensions,\n\u001b[1;32m 1692\u001b[0m response_kwargs\u001b[38;5;241m=\u001b[39mresponse_kwargs,\n\u001b[1;32m 1693\u001b[0m return_analysis\u001b[38;5;241m=\u001b[39mreturn_analysis,\n\u001b[1;32m 1694\u001b[0m analysis_model\u001b[38;5;241m=\u001b[39manalysis_model,\n\u001b[1;32m 1695\u001b[0m verbose_action\u001b[38;5;241m=\u001b[39mverbose,\n\u001b[1;32m 1696\u001b[0m verbose_analysis\u001b[38;5;241m=\u001b[39mverbose,\n\u001b[1;32m 1697\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 1698\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/operations/ReAct/ReAct.py:129\u001b[0m, in \u001b[0;36mReAct\u001b[0;34m(branch, instruct, interpret, interpret_domain, interpret_style, interpret_sample, interpret_kwargs, tools, tool_schemas, response_format, extension_allowed, max_extensions, response_kwargs, return_analysis, analysis_model, verbose_analysis, **kwargs)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m analysis\u001b[38;5;241m.\u001b[39maction_batch_size:\n\u001b[1;32m 127\u001b[0m operate_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maction_batch_size\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m analysis\u001b[38;5;241m.\u001b[39maction_batch_size\n\u001b[0;32m--> 129\u001b[0m analysis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m branch\u001b[38;5;241m.\u001b[39moperate(\n\u001b[1;32m 130\u001b[0m instruction\u001b[38;5;241m=\u001b[39mnew_instruction,\n\u001b[1;32m 131\u001b[0m tools\u001b[38;5;241m=\u001b[39mtools,\n\u001b[1;32m 132\u001b[0m tool_schemas\u001b[38;5;241m=\u001b[39mtool_schemas,\n\u001b[1;32m 133\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moperate_kwargs,\n\u001b[1;32m 134\u001b[0m )\n\u001b[1;32m 135\u001b[0m analyses\u001b[38;5;241m.\u001b[39mappend(analysis)\n\u001b[1;32m 136\u001b[0m round_count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n", + "File \u001b[0;32m~/lionagi/lionagi/session/branch.py:1026\u001b[0m, in \u001b[0;36mBranch.operate\u001b[0;34m(self, instruct, instruction, guidance, context, sender, recipient, progression, imodel, chat_model, invoke_actions, tool_schemas, images, image_detail, parse_model, skip_validation, tools, operative, response_format, return_operative, actions, reason, action_kwargs, action_strategy, action_batch_size, verbose_action, field_models, exclude_fields, request_params, request_param_kwargs, response_params, response_param_kwargs, handle_validation, operative_model, request_model, **kwargs)\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;124;03mOrchestrates an \"operate\" flow with optional tool invocation and\u001b[39;00m\n\u001b[1;32m 927\u001b[0m \u001b[38;5;124;03mstructured response validation. Messages **are** automatically\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1022\u001b[0m \u001b[38;5;124;03m - If the LLM's response cannot be parsed into the expected format and `handle_validation='raise'`.\u001b[39;00m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlionagi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moperations\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moperate\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moperate\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m operate\n\u001b[0;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m operate(\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1028\u001b[0m instruct\u001b[38;5;241m=\u001b[39minstruct,\n\u001b[1;32m 1029\u001b[0m instruction\u001b[38;5;241m=\u001b[39minstruction,\n\u001b[1;32m 1030\u001b[0m guidance\u001b[38;5;241m=\u001b[39mguidance,\n\u001b[1;32m 1031\u001b[0m context\u001b[38;5;241m=\u001b[39mcontext,\n\u001b[1;32m 1032\u001b[0m sender\u001b[38;5;241m=\u001b[39msender,\n\u001b[1;32m 1033\u001b[0m recipient\u001b[38;5;241m=\u001b[39mrecipient,\n\u001b[1;32m 1034\u001b[0m progression\u001b[38;5;241m=\u001b[39mprogression,\n\u001b[1;32m 1035\u001b[0m chat_model\u001b[38;5;241m=\u001b[39mchat_model,\n\u001b[1;32m 1036\u001b[0m invoke_actions\u001b[38;5;241m=\u001b[39minvoke_actions,\n\u001b[1;32m 1037\u001b[0m tool_schemas\u001b[38;5;241m=\u001b[39mtool_schemas,\n\u001b[1;32m 1038\u001b[0m images\u001b[38;5;241m=\u001b[39mimages,\n\u001b[1;32m 1039\u001b[0m image_detail\u001b[38;5;241m=\u001b[39mimage_detail,\n\u001b[1;32m 1040\u001b[0m parse_model\u001b[38;5;241m=\u001b[39mparse_model,\n\u001b[1;32m 1041\u001b[0m skip_validation\u001b[38;5;241m=\u001b[39mskip_validation,\n\u001b[1;32m 1042\u001b[0m tools\u001b[38;5;241m=\u001b[39mtools,\n\u001b[1;32m 1043\u001b[0m operative\u001b[38;5;241m=\u001b[39moperative,\n\u001b[1;32m 1044\u001b[0m response_format\u001b[38;5;241m=\u001b[39mresponse_format,\n\u001b[1;32m 1045\u001b[0m return_operative\u001b[38;5;241m=\u001b[39mreturn_operative,\n\u001b[1;32m 1046\u001b[0m actions\u001b[38;5;241m=\u001b[39mactions,\n\u001b[1;32m 1047\u001b[0m reason\u001b[38;5;241m=\u001b[39mreason,\n\u001b[1;32m 1048\u001b[0m action_kwargs\u001b[38;5;241m=\u001b[39maction_kwargs,\n\u001b[1;32m 1049\u001b[0m action_strategy\u001b[38;5;241m=\u001b[39maction_strategy,\n\u001b[1;32m 1050\u001b[0m action_batch_size\u001b[38;5;241m=\u001b[39maction_batch_size,\n\u001b[1;32m 1051\u001b[0m verbose_action\u001b[38;5;241m=\u001b[39mverbose_action,\n\u001b[1;32m 1052\u001b[0m field_models\u001b[38;5;241m=\u001b[39mfield_models,\n\u001b[1;32m 1053\u001b[0m exclude_fields\u001b[38;5;241m=\u001b[39mexclude_fields,\n\u001b[1;32m 1054\u001b[0m request_params\u001b[38;5;241m=\u001b[39mrequest_params,\n\u001b[1;32m 1055\u001b[0m request_param_kwargs\u001b[38;5;241m=\u001b[39mrequest_param_kwargs,\n\u001b[1;32m 1056\u001b[0m response_params\u001b[38;5;241m=\u001b[39mresponse_params,\n\u001b[1;32m 1057\u001b[0m response_param_kwargs\u001b[38;5;241m=\u001b[39mresponse_param_kwargs,\n\u001b[1;32m 1058\u001b[0m handle_validation\u001b[38;5;241m=\u001b[39mhandle_validation,\n\u001b[1;32m 1059\u001b[0m operative_model\u001b[38;5;241m=\u001b[39moperative_model,\n\u001b[1;32m 1060\u001b[0m request_model\u001b[38;5;241m=\u001b[39mrequest_model,\n\u001b[1;32m 1061\u001b[0m imodel\u001b[38;5;241m=\u001b[39mimodel,\n\u001b[1;32m 1062\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 1063\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/operations/operate/operate.py:128\u001b[0m, in \u001b[0;36moperate\u001b[0;34m(branch, instruct, instruction, guidance, context, sender, recipient, progression, imodel, chat_model, invoke_actions, tool_schemas, images, image_detail, parse_model, skip_validation, tools, operative, response_format, return_operative, actions, reason, action_kwargs, action_strategy, action_batch_size, verbose_action, field_models, exclude_fields, request_params, request_param_kwargs, response_params, response_param_kwargs, handle_validation, operative_model, request_model, **kwargs)\u001b[0m\n\u001b[1;32m 125\u001b[0m tool_schemas \u001b[38;5;241m=\u001b[39m tool_schemas \u001b[38;5;129;01mor\u001b[39;00m branch\u001b[38;5;241m.\u001b[39macts\u001b[38;5;241m.\u001b[39mget_tool_schema(tools\u001b[38;5;241m=\u001b[39mtools)\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# 2) Send the instruction to the chat model\u001b[39;00m\n\u001b[0;32m--> 128\u001b[0m ins, res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m branch\u001b[38;5;241m.\u001b[39mchat(\n\u001b[1;32m 129\u001b[0m instruction\u001b[38;5;241m=\u001b[39minstruct\u001b[38;5;241m.\u001b[39minstruction,\n\u001b[1;32m 130\u001b[0m guidance\u001b[38;5;241m=\u001b[39minstruct\u001b[38;5;241m.\u001b[39mguidance,\n\u001b[1;32m 131\u001b[0m context\u001b[38;5;241m=\u001b[39minstruct\u001b[38;5;241m.\u001b[39mcontext,\n\u001b[1;32m 132\u001b[0m sender\u001b[38;5;241m=\u001b[39msender,\n\u001b[1;32m 133\u001b[0m recipient\u001b[38;5;241m=\u001b[39mrecipient,\n\u001b[1;32m 134\u001b[0m response_format\u001b[38;5;241m=\u001b[39moperative\u001b[38;5;241m.\u001b[39mrequest_type,\n\u001b[1;32m 135\u001b[0m progression\u001b[38;5;241m=\u001b[39mprogression,\n\u001b[1;32m 136\u001b[0m imodel\u001b[38;5;241m=\u001b[39mchat_model, \u001b[38;5;66;03m# or the override\u001b[39;00m\n\u001b[1;32m 137\u001b[0m images\u001b[38;5;241m=\u001b[39mimages,\n\u001b[1;32m 138\u001b[0m image_detail\u001b[38;5;241m=\u001b[39mimage_detail,\n\u001b[1;32m 139\u001b[0m tool_schemas\u001b[38;5;241m=\u001b[39mtool_schemas,\n\u001b[1;32m 140\u001b[0m return_ins_res_message\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 141\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 142\u001b[0m )\n\u001b[1;32m 143\u001b[0m branch\u001b[38;5;241m.\u001b[39mmsgs\u001b[38;5;241m.\u001b[39madd_message(instruction\u001b[38;5;241m=\u001b[39mins)\n\u001b[1;32m 144\u001b[0m branch\u001b[38;5;241m.\u001b[39mmsgs\u001b[38;5;241m.\u001b[39madd_message(assistant_response\u001b[38;5;241m=\u001b[39mres)\n", + "File \u001b[0;32m~/lionagi/lionagi/session/branch.py:782\u001b[0m, in \u001b[0;36mBranch.chat\u001b[0;34m(self, instruction, guidance, context, sender, recipient, request_fields, response_format, progression, imodel, tool_schemas, images, image_detail, plain_content, return_ins_res_message, **kwargs)\u001b[0m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 734\u001b[0m \u001b[38;5;124;03mInvokes the chat model with the current conversation history. This method does not\u001b[39;00m\n\u001b[1;32m 735\u001b[0m \u001b[38;5;124;03mautomatically add messages to the branch. It is typically used for orchestrating.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;124;03m The `Instruction` object and the final `AssistantResponse`.\u001b[39;00m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 780\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mlionagi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moperations\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m chat\n\u001b[0;32m--> 782\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m chat(\n\u001b[1;32m 783\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 784\u001b[0m instruction\u001b[38;5;241m=\u001b[39minstruction,\n\u001b[1;32m 785\u001b[0m guidance\u001b[38;5;241m=\u001b[39mguidance,\n\u001b[1;32m 786\u001b[0m context\u001b[38;5;241m=\u001b[39mcontext,\n\u001b[1;32m 787\u001b[0m sender\u001b[38;5;241m=\u001b[39msender,\n\u001b[1;32m 788\u001b[0m recipient\u001b[38;5;241m=\u001b[39mrecipient,\n\u001b[1;32m 789\u001b[0m request_fields\u001b[38;5;241m=\u001b[39mrequest_fields,\n\u001b[1;32m 790\u001b[0m response_format\u001b[38;5;241m=\u001b[39mresponse_format,\n\u001b[1;32m 791\u001b[0m progression\u001b[38;5;241m=\u001b[39mprogression,\n\u001b[1;32m 792\u001b[0m chat_model\u001b[38;5;241m=\u001b[39mimodel,\n\u001b[1;32m 793\u001b[0m tool_schemas\u001b[38;5;241m=\u001b[39mtool_schemas,\n\u001b[1;32m 794\u001b[0m images\u001b[38;5;241m=\u001b[39mimages,\n\u001b[1;32m 795\u001b[0m image_detail\u001b[38;5;241m=\u001b[39mimage_detail,\n\u001b[1;32m 796\u001b[0m plain_content\u001b[38;5;241m=\u001b[39mplain_content,\n\u001b[1;32m 797\u001b[0m return_ins_res_message\u001b[38;5;241m=\u001b[39mreturn_ins_res_message,\n\u001b[1;32m 798\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 799\u001b[0m )\n", + "File \u001b[0;32m~/lionagi/lionagi/operations/chat/chat.py:159\u001b[0m, in \u001b[0;36mchat\u001b[0;34m(branch, instruction, guidance, context, sender, recipient, request_fields, response_format, progression, imodel, tool_schemas, images, image_detail, plain_content, return_ins_res_message, **kwargs)\u001b[0m\n\u001b[1;32m 152\u001b[0m imodel \u001b[38;5;241m=\u001b[39m imodel \u001b[38;5;129;01mor\u001b[39;00m branch\u001b[38;5;241m.\u001b[39mchat_model\n\u001b[1;32m 154\u001b[0m meth \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 155\u001b[0m imodel\u001b[38;5;241m.\u001b[39minvoke\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m kwargs \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m imodel\u001b[38;5;241m.\u001b[39mstream\n\u001b[1;32m 158\u001b[0m )\n\u001b[0;32m--> 159\u001b[0m api_call \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m meth(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 160\u001b[0m branch\u001b[38;5;241m.\u001b[39m_log_manager\u001b[38;5;241m.\u001b[39mlog(Log\u001b[38;5;241m.\u001b[39mcreate(api_call))\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m return_ins_res_message:\n\u001b[1;32m 163\u001b[0m \u001b[38;5;66;03m# Wrap result in `AssistantResponse` and return\u001b[39;00m\n", + "File \u001b[0;32m~/lionagi/lionagi/service/imodel.py:237\u001b[0m, in \u001b[0;36miModel.invoke\u001b[0;34m(self, api_call, **kwargs)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecutor\u001b[38;5;241m.\u001b[39mstart()\n\u001b[1;32m 236\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecutor\u001b[38;5;241m.\u001b[39mappend(api_call)\n\u001b[0;32m--> 237\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecutor\u001b[38;5;241m.\u001b[39mforward()\n\u001b[1;32m 239\u001b[0m ctr \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m api_call\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [EventStatus\u001b[38;5;241m.\u001b[39mCOMPLETED, EventStatus\u001b[38;5;241m.\u001b[39mFAILED]:\n", + "File \u001b[0;32m~/lionagi/lionagi/protocols/generic/processor.py:241\u001b[0m, in \u001b[0;36mExecutor.forward\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 238\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpile[id_]\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprocessor\u001b[38;5;241m.\u001b[39menqueue(event)\n\u001b[0;32m--> 241\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprocessor\u001b[38;5;241m.\u001b[39mprocess()\n", + "File \u001b[0;32m~/lionagi/lionagi/protocols/generic/processor.py:155\u001b[0m, in \u001b[0;36mProcessor.process\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_available_capacity \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tasks:\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mwait(tasks)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mavailable_capacity \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mqueue_capacity\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/asyncio/tasks.py:384\u001b[0m, in \u001b[0;36mwait\u001b[0;34m(fs, timeout, return_when)\u001b[0m\n\u001b[1;32m 377\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe explicit passing of coroutine objects to \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 378\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masyncio.wait() is deprecated since Python 3.8, and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscheduled for removal in Python 3.11.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 380\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m, stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 382\u001b[0m fs \u001b[38;5;241m=\u001b[39m {ensure_future(f, loop\u001b[38;5;241m=\u001b[39mloop) \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m fs}\n\u001b[0;32m--> 384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m _wait(fs, timeout, return_when, loop)\n", + "File \u001b[0;32m/opt/homebrew/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/asyncio/tasks.py:491\u001b[0m, in \u001b[0;36m_wait\u001b[0;34m(fs, timeout, return_when, loop)\u001b[0m\n\u001b[1;32m 488\u001b[0m f\u001b[38;5;241m.\u001b[39madd_done_callback(_on_completion)\n\u001b[1;32m 490\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 491\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m waiter\n\u001b[1;32m 492\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout_handle \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[0;31mCancelledError\u001b[0m: " + ] } ], "source": [ @@ -344,7 +421,7 @@ " interpret_domain=\"AI\",\n", " interpret_style=\"exhaustive\",\n", " extension_allowed=True,\n", - " max_extensions=5,\n", + " max_extensions=20,\n", " verbose=True,\n", " response_format=ResearchReport,\n", ")" @@ -352,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -388,295 +465,119 @@ " \n", " \n", " 0\n", - " 2025-01-20 21:03:14.209670\n", + " 2025-01-22 12:22:33.638217\n", " user\n", " {'context': [{'paper_url': '/Users/lion/lionag...\n", - " fdc3bf95-20f1-4ef6-9e30-3000f8b7557f\n", + " d59e4a6b-8906-41bc-a9dd-197cfe61e115\n", " user\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", " {'lion_class': 'lionagi.protocols.messages.ins...\n", " \n", " \n", " 1\n", - " 2025-01-20 21:03:32.636672\n", + " 2025-01-22 12:22:41.680383\n", " assistant\n", " {'assistant_response': '```json\n", "{\n", " \"analysis\"...\n", - " 8eb368aa-ee95-44cb-8f1a-5520c647db58\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", + " 0c959686-becf-4e98-92c3-791cb24224c5\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", " user\n", - " {'model_response': {'id': 'gen-1737424994-WMdZ...\n", + " {'model_response': {'id': 'gen-1737566554-cplN...\n", " \n", " \n", " 2\n", - " 2025-01-20 21:03:32.642525\n", + " 2025-01-22 12:23:00.688365\n", " action\n", " {'action_request': {'function': 'reader_tool',...\n", - " 00c328f6-747b-4a8d-917d-0510a4983959\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", + " 805a6b2a-d5e3-41f0-9b94-e36c6835dc9f\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", + " 01c18b37-9e8f-4553-8189-0a2372cbb389\n", " {'lion_class': 'lionagi.protocols.messages.act...\n", " \n", " \n", " 3\n", - " 2025-01-20 21:03:32.642601\n", + " 2025-01-22 12:23:00.688484\n", " action\n", - " {'action_request_id': '00c328f6-747b-4a8d-917d...\n", - " cc8c5e58-66d0-408f-bed9-727058251187\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", + " {'action_request_id': '805a6b2a-d5e3-41f0-9b94...\n", + " 88b81adb-d3d2-401f-a956-147362d8ce48\n", + " 01c18b37-9e8f-4553-8189-0a2372cbb389\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", " {'lion_class': 'lionagi.protocols.messages.act...\n", " \n", " \n", " 4\n", - " 2025-01-20 21:03:52.265885\n", - " action\n", - " {'action_request': {'function': 'reader_tool',...\n", - " 463507f3-80ff-44d4-94ac-bd572b1667a5\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 5\n", - " 2025-01-20 21:03:52.265967\n", - " action\n", - " {'action_request_id': '463507f3-80ff-44d4-94ac...\n", - " b33463c3-588b-40e6-96a9-5c5243c7f280\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 6\n", - " 2025-01-20 21:03:55.253324\n", - " action\n", - " {'action_request': {'function': 'search_exa', ...\n", - " baefe8b5-8c25-4223-9d92-dd67264e7a60\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " 6aca17b6-fad8-4cf2-b2cf-b98afa921dae\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 7\n", - " 2025-01-20 21:03:55.253463\n", - " action\n", - " {'action_request_id': 'baefe8b5-8c25-4223-9d92...\n", - " 2c94453d-8c64-4f6d-a4d1-0ddc696b5baa\n", - " 6aca17b6-fad8-4cf2-b2cf-b98afa921dae\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 8\n", - " 2025-01-20 21:03:55.266244\n", - " user\n", - " {'context': [{'action_request_id': '00c328f6-7...\n", - " a6e70d36-68c8-4c49-bcdb-517f30674f3e\n", - " user\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " {'lion_class': 'lionagi.protocols.messages.ins...\n", - " \n", - " \n", - " 9\n", - " 2025-01-20 21:04:10.677831\n", - " assistant\n", - " {'assistant_response': '```json\n", - "{\n", - " \"analysis\"...\n", - " 255acbfe-91dc-4f9f-a984-47105814879c\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " user\n", - " {'model_response': {'id': 'gen-1737425035-sP0p...\n", - " \n", - " \n", - " 10\n", - " 2025-01-20 21:04:10.680589\n", - " action\n", - " {'action_request': {'function': 'reader_tool',...\n", - " 3b3677e4-7bd1-470c-8256-94629ec171a4\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 11\n", - " 2025-01-20 21:04:10.680672\n", - " action\n", - " {'action_request_id': '3b3677e4-7bd1-470c-8256...\n", - " b0ebd178-4fb1-46f6-a1d7-fdbfbf40b2c3\n", - " 7b1144b6-125c-462f-9df5-8c8b0b7f8d85\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " {'lion_class': 'lionagi.protocols.messages.act...\n", - " \n", - " \n", - " 12\n", - " 2025-01-20 21:04:10.688905\n", + " 2025-01-22 12:23:00.695418\n", " user\n", - " {'context': [{'action_request_id': '3b3677e4-7...\n", - " 2ad36722-05c1-473e-bc60-e453612bf11d\n", + " {'context': [{'action_request_id': '805a6b2a-d...\n", + " f38a0766-049d-4f07-bbf3-a922ee013b5b\n", " user\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", " {'lion_class': 'lionagi.protocols.messages.ins...\n", " \n", " \n", - " 13\n", - " 2025-01-20 21:04:34.820197\n", - " assistant\n", - " {'assistant_response': '```json\n", - "{\n", - " \"analysis\"...\n", - " b114be13-6e1d-4635-ae0a-1d1517fd70c1\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " user\n", - " {'model_response': {'id': 'gen-1737425051-8qlJ...\n", - " \n", - " \n", - " 14\n", - " 2025-01-20 21:04:47.173450\n", - " user\n", - " {'context': [], 'instruction': 'Given your rea...\n", - " c09eaa3e-516f-48d0-b717-cd17bdbaa6d4\n", - " user\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", - " {'lion_class': 'lionagi.protocols.messages.ins...\n", - " \n", - " \n", - " 15\n", - " 2025-01-20 21:05:07.356005\n", + " 5\n", + " 2025-01-22 12:23:15.553754\n", " assistant\n", " {'assistant_response': '```json\n", "{\n", " \"title\": \"...\n", - " 2a237a09-abed-48b5-94f9-05b5c15c4552\n", - " 2d394afd-2800-4303-9561-9a19a3f55ab8\n", + " c0d82ebb-cd5a-48ac-8f69-2e975a11e305\n", + " 3c647210-6934-45c4-8ae2-20a131c5ef44\n", " user\n", - " {'model_response': {'id': 'gen-1737425087-70BF...\n", + " {'model_response': {'id': 'gen-1737566581-Feoq...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " created_at role \\\n", - "0 2025-01-20 21:03:14.209670 user \n", - "1 2025-01-20 21:03:32.636672 assistant \n", - "2 2025-01-20 21:03:32.642525 action \n", - "3 2025-01-20 21:03:32.642601 action \n", - "4 2025-01-20 21:03:52.265885 action \n", - "5 2025-01-20 21:03:52.265967 action \n", - "6 2025-01-20 21:03:55.253324 action \n", - "7 2025-01-20 21:03:55.253463 action \n", - "8 2025-01-20 21:03:55.266244 user \n", - "9 2025-01-20 21:04:10.677831 assistant \n", - "10 2025-01-20 21:04:10.680589 action \n", - "11 2025-01-20 21:04:10.680672 action \n", - "12 2025-01-20 21:04:10.688905 user \n", - "13 2025-01-20 21:04:34.820197 assistant \n", - "14 2025-01-20 21:04:47.173450 user \n", - "15 2025-01-20 21:05:07.356005 assistant \n", + " created_at role \\\n", + "0 2025-01-22 12:22:33.638217 user \n", + "1 2025-01-22 12:22:41.680383 assistant \n", + "2 2025-01-22 12:23:00.688365 action \n", + "3 2025-01-22 12:23:00.688484 action \n", + "4 2025-01-22 12:23:00.695418 user \n", + "5 2025-01-22 12:23:15.553754 assistant \n", "\n", - " content \\\n", - "0 {'context': [{'paper_url': '/Users/lion/lionag... \n", - "1 {'assistant_response': '```json\n", + " content \\\n", + "0 {'context': [{'paper_url': '/Users/lion/lionag... \n", + "1 {'assistant_response': '```json\n", "{\n", " \"analysis\"... \n", - "2 {'action_request': {'function': 'reader_tool',... \n", - "3 {'action_request_id': '00c328f6-747b-4a8d-917d... \n", - "4 {'action_request': {'function': 'reader_tool',... \n", - "5 {'action_request_id': '463507f3-80ff-44d4-94ac... \n", - "6 {'action_request': {'function': 'search_exa', ... \n", - "7 {'action_request_id': 'baefe8b5-8c25-4223-9d92... \n", - "8 {'context': [{'action_request_id': '00c328f6-7... \n", - "9 {'assistant_response': '```json\n", - "{\n", - " \"analysis\"... \n", - "10 {'action_request': {'function': 'reader_tool',... \n", - "11 {'action_request_id': '3b3677e4-7bd1-470c-8256... \n", - "12 {'context': [{'action_request_id': '3b3677e4-7... \n", - "13 {'assistant_response': '```json\n", - "{\n", - " \"analysis\"... \n", - "14 {'context': [], 'instruction': 'Given your rea... \n", - "15 {'assistant_response': '```json\n", + "2 {'action_request': {'function': 'reader_tool',... \n", + "3 {'action_request_id': '805a6b2a-d5e3-41f0-9b94... \n", + "4 {'context': [{'action_request_id': '805a6b2a-d... \n", + "5 {'assistant_response': '```json\n", "{\n", " \"title\": \"... \n", "\n", - " id \\\n", - "0 fdc3bf95-20f1-4ef6-9e30-3000f8b7557f \n", - "1 8eb368aa-ee95-44cb-8f1a-5520c647db58 \n", - "2 00c328f6-747b-4a8d-917d-0510a4983959 \n", - "3 cc8c5e58-66d0-408f-bed9-727058251187 \n", - "4 463507f3-80ff-44d4-94ac-bd572b1667a5 \n", - "5 b33463c3-588b-40e6-96a9-5c5243c7f280 \n", - "6 baefe8b5-8c25-4223-9d92-dd67264e7a60 \n", - "7 2c94453d-8c64-4f6d-a4d1-0ddc696b5baa \n", - "8 a6e70d36-68c8-4c49-bcdb-517f30674f3e \n", - "9 255acbfe-91dc-4f9f-a984-47105814879c \n", - "10 3b3677e4-7bd1-470c-8256-94629ec171a4 \n", - "11 b0ebd178-4fb1-46f6-a1d7-fdbfbf40b2c3 \n", - "12 2ad36722-05c1-473e-bc60-e453612bf11d \n", - "13 b114be13-6e1d-4635-ae0a-1d1517fd70c1 \n", - "14 c09eaa3e-516f-48d0-b717-cd17bdbaa6d4 \n", - "15 2a237a09-abed-48b5-94f9-05b5c15c4552 \n", - "\n", - " sender \\\n", - "0 user \n", - "1 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "2 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "3 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "4 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "5 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "6 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "7 6aca17b6-fad8-4cf2-b2cf-b98afa921dae \n", - "8 user \n", - "9 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "10 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "11 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "12 user \n", - "13 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "14 user \n", - "15 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", + " id sender \\\n", + "0 d59e4a6b-8906-41bc-a9dd-197cfe61e115 user \n", + "1 0c959686-becf-4e98-92c3-791cb24224c5 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", + "2 805a6b2a-d5e3-41f0-9b94-e36c6835dc9f 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", + "3 88b81adb-d3d2-401f-a956-147362d8ce48 01c18b37-9e8f-4553-8189-0a2372cbb389 \n", + "4 f38a0766-049d-4f07-bbf3-a922ee013b5b user \n", + "5 c0d82ebb-cd5a-48ac-8f69-2e975a11e305 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", "\n", - " recipient \\\n", - "0 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "1 user \n", - "2 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "3 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "4 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "5 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "6 6aca17b6-fad8-4cf2-b2cf-b98afa921dae \n", - "7 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "8 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "9 user \n", - "10 7b1144b6-125c-462f-9df5-8c8b0b7f8d85 \n", - "11 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "12 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "13 user \n", - "14 2d394afd-2800-4303-9561-9a19a3f55ab8 \n", - "15 user \n", + " recipient \\\n", + "0 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", + "1 user \n", + "2 01c18b37-9e8f-4553-8189-0a2372cbb389 \n", + "3 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", + "4 3c647210-6934-45c4-8ae2-20a131c5ef44 \n", + "5 user \n", "\n", - " metadata \n", - "0 {'lion_class': 'lionagi.protocols.messages.ins... \n", - "1 {'model_response': {'id': 'gen-1737424994-WMdZ... \n", - "2 {'lion_class': 'lionagi.protocols.messages.act... \n", - "3 {'lion_class': 'lionagi.protocols.messages.act... \n", - "4 {'lion_class': 'lionagi.protocols.messages.act... \n", - "5 {'lion_class': 'lionagi.protocols.messages.act... \n", - "6 {'lion_class': 'lionagi.protocols.messages.act... \n", - "7 {'lion_class': 'lionagi.protocols.messages.act... \n", - "8 {'lion_class': 'lionagi.protocols.messages.ins... \n", - "9 {'model_response': {'id': 'gen-1737425035-sP0p... \n", - "10 {'lion_class': 'lionagi.protocols.messages.act... \n", - "11 {'lion_class': 'lionagi.protocols.messages.act... \n", - "12 {'lion_class': 'lionagi.protocols.messages.ins... \n", - "13 {'model_response': {'id': 'gen-1737425051-8qlJ... \n", - "14 {'lion_class': 'lionagi.protocols.messages.ins... \n", - "15 {'model_response': {'id': 'gen-1737425087-70BF... " + " metadata \n", + "0 {'lion_class': 'lionagi.protocols.messages.ins... \n", + "1 {'model_response': {'id': 'gen-1737566554-cplN... \n", + "2 {'lion_class': 'lionagi.protocols.messages.act... \n", + "3 {'lion_class': 'lionagi.protocols.messages.act... \n", + "4 {'lion_class': 'lionagi.protocols.messages.ins... \n", + "5 {'model_response': {'id': 'gen-1737566581-Feoq... " ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -687,85 +588,32 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ - "# DeepSeek-R1: Advancing LLM Reasoning Through Reinforcement Learning\n", + "# Comprehensive Analysis of 'DeepSeek_R1.pdf' and Its Position in Current Research\n", "\n", - "### Structured Summary\n", - "**Key Themes**:\n", - "- Pure reinforcement learning (RL) for reasoning capability emergence without supervised fine-tuning (SFT)\n", - "- Self-evolution of chain-of-thought (CoT) behaviors through Group Relative Policy Optimization (GRPO)\n", - "- Multi-stage training pipeline with cold-start data integration\n", - "- Scalable knowledge distillation to smaller models (1.5B-70B parameters)\n", + "The paper titled 'DeepSeek_R1' presents a novel approach to [insert specific topic, e.g., deep learning-based molecular binding affinity prediction]. The authors propose a [describe the methodology, e.g., a convolutional neural network architecture enhanced with attention mechanisms] to improve the accuracy and efficiency of [specific application, e.g., predicting protein-ligand interactions]. Through extensive experiments, the paper demonstrates that DeepSeek outperforms existing models by [specific metrics, e.g., achieving a 15% increase in prediction accuracy and reducing computational time by 20%]. The study also explores the model's robustness across diverse datasets, highlighting its potential for broader applications in [related fields, e.g., drug discovery and bioinformatics].\n", "\n", - "**Methodologies**:\n", - "1. GRPO algorithm with group-based advantage estimation\n", - "2. Rule-based reward system (accuracy + format constraints)\n", - "3. Two-phase RL: reasoning-focused then general alignment\n", - "4. Distillation using 800K curated samples from R1 outputs\n", + "When compared to recent studies in the same domain, such as [Paper A] and [Paper B], DeepSeek offers significant improvements in [specific areas, e.g., model accuracy and computational efficiency]. While [Paper A] introduced a similar neural architecture, it lacked the [specific feature, e.g., attention mechanism] that DeepSeek utilizes to enhance performance. On the other hand, [Paper B] focused on [different approach], which, although innovative, did not achieve the same level of accuracy as DeepSeek.\n", "\n", - "**Contributions**:\n", - "- First open-source demonstration of SFT-free RL reasoning (71% → 86.7% AIME with voting)\n", - "- Performance parity with OpenAI-o1-1217 (79.8% vs 79.2% AIME pass@1)\n", - "- 14B distilled model outperforms QwQ-32B by +19.7% on MATH-500\n", - "- Open-source release of 6 distilled models across Qwen/Llama architectures\n", + "In terms of results, DeepSeek not only matches but exceeds the performance benchmarks set by these recent studies. The comparative analysis reveals that DeepSeek's integrated approach allows for better handling of [specific challenges, e.g., high-dimensional data], making it a more robust and versatile tool in the field. Additionally, the paper's comprehensive evaluation methodology provides a clearer understanding of the model's strengths and potential areas for future improvement.\n", "\n", - "### Comparative Analysis\n", - "**Recent Works (2023-2024)**:\n", - "1. *Training 1.7B LLaMa From Scratch* (arXiv:2412.13335):\n", - " - Similar: Emphasis on data quality\n", - " - Different: Requires SFT initialization vs DeepSeek's pure RL\n", - " - Metrics: 20B tokens trained vs DeepSeek's RL-first approach\n", + "**Source**: [DeepSeek_R1.pdf](/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf)\n", "\n", - "2. *Computational Bottlenecks of SLMs* (arXiv:2410.19456):\n", - " - Similar: Focus on training efficiency\n", - " - Different: Analyzes hardware constraints vs capability emergence\n", - " - Metrics: 2B param focus vs DeepSeek's 70B distillation\n", + "**Source**: [Recent Advances in Deep Learning for Molecular Binding Affinity Prediction](https://www.example.com/recent-advances-deep-learning)\n", "\n", - "3. *BabyHGRN: RNN Efficiency* (arXiv:2412.15978):\n", - " - Similar: Compute efficiency goals\n", - " - Different: RNN architecture vs transformer-based RL\n", - " - Metrics: 55.5% BLiMP vs DeepSeek's 97.3% MATH-500\n", - "\n", - "**Results Comparison**:\n", - "| Model | AIME Pass@1 | MATH-500 | Training Efficiency |\n", - "|---------------------|-------------|----------|---------------------|\n", - "| DeepSeek-R1 | 79.8% | 97.3% | 37B active params |\n", - "| OpenAI-o1-1217 | 79.2% | 96.4% | Closed-source |\n", - "| BabyHGRN (100M) | N/A | 83.9% | RNN-based |\n", - "| QwQ-32B-Preview | 50.0% | 90.6% | SFT-focused |\n", - "\n", - "### Critical Evaluation\n", - "**Novelty**:\n", - "- First successful pure RL reasoning capability emergence\n", - "- Cold-start pipeline resolving RL instability\n", - "- 94.3% MATH-500 performance at 32B scale via distillation\n", - "\n", - "**Limitations**:\n", - "- Language mixing in non-Chinese/English queries\n", - "- Sensitivity to prompt engineering (requires zero-shot)\n", - "- Limited SWE benchmark improvement (49.2% vs base 42%)\n", - "\n", - "**Impact**: Provides blueprint for RL-first training while maintaining 87.6% AlpacaEval performance\n", - "\n", - "**Source**: [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf)\n", - "\n", - "**Source**: [Experience of Training a 1.7B-Parameter LLaMa Model From Scratch](https://arxiv.org/abs/2412.13335)\n", - "\n", - "**Source**: [Computational Bottlenecks of Training Small-scale Large Language Models](https://arxiv.org/abs/2410.19456)\n", - "\n", - "**Source**: [BabyHGRN: Exploring RNNs for Sample-Efficient Training of Language Models](https://arxiv.org/abs/2412.15978)\n", + "**Source**: [Attention Mechanisms in Neural Networks: A Comprehensive Review](https://www.example.com/attention-mechanisms-review)\n", "\n" ], "text/plain": [ "" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -788,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -799,25 +647,16 @@ "**Response**:\n", "```json\n", "{\n", - " \"analysis\": \"To address the user's request, the immediate priority is to parse and analyze the DeepSeek_R1.pdf paper using the reader_tool. After extracting its content, a structured summary will be generated. Concurrently, recent papers (post-2022) must be retrieved via external search tools to enable comparative analysis. The first step involves opening and reading the target PDF, followed by searches for relevant recent works.\",\n", - " \"planned_actions\": [\n", - " {\n", - " \"action_type\": \"reader_tool\",\n", - " \"description\": \"Open and read the entire DeepSeek_R1.pdf to extract text for analysis.\"\n", - " },\n", - " {\n", - " \"action_type\": \"search_exa\",\n", - " \"description\": \"Search for recent LLM papers (post-2022) focusing on training efficiency, architecture innovations, and benchmark results.\"\n", - " }\n", - " ],\n", - " \"extension_needed\": true,\n", - " \"milestone\": \"Extract DeepSeek paper content and identify 3-5 relevant recent papers for comparison.\",\n", + " \"analysis\": null,\n", + " \"planned_actions\": null,\n", + " \"extension_needed\": null,\n", + " \"milestone\": null,\n", " \"action_strategy\": \"sequential\",\n", " \"action_batch_size\": null,\n", " \"reason\": {\n", - " \"title\": \"Initial Action Plan\",\n", - " \"content\": \"The DeepSeek paper must be parsed first to identify its core contributions. Parallel searches for recent works will ensure up-to-date comparisons. Sequential execution avoids partial data analysis.\",\n", - " \"confidence_score\": 0.95\n", + " \"title\": \"Initial Step\",\n", + " \"content\": \"To provide a detailed explanation and comparative analysis of the local paper, I need to first access and read the document located at the specified file path.\",\n", + " \"confidence_score\": 1.0\n", " },\n", " \"action_requests\": [\n", " {\n", @@ -826,76 +665,6 @@ " \"action\": \"open\",\n", " \"path_or_url\": \"/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf\"\n", " }\n", - " },\n", - " {\n", - " \"function\": \"reader_tool\",\n", - " \"arguments\": {\n", - " \"action\": \"read\",\n", - " \"doc_id\": \"DeepSeek_R1.pdf\",\n", - " \"start_offset\": 0,\n", - " \"end_offset\": null\n", - " }\n", - " },\n", - " {\n", - " \"function\": \"search_exa\",\n", - " \"arguments\": {\n", - " \"query\": \"large language model training efficiency innovations post-2022\",\n", - " \"category\": \"research paper\",\n", - " \"numResults\": 5,\n", - " \"startPublishedDate\": \"2023-01-01T00:00:00.000Z\",\n", - " \"contents\": {\n", - " \"text\": {\n", - " \"includeHtmlTags\": false,\n", - " \"maxCharacters\": 5000\n", - " }\n", - " }\n", - " }\n", - " }\n", - " ],\n", - " \"action_required\": true\n", - "}\n", - "```" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "# Assistant Response\n", - "\n", - "**Response**:\n", - "```json\n", - "{\n", - " \"analysis\": \"The initial attempt to read 'DeepSeek_R1.pdf' failed due to incorrect doc_id reference. The document is now successfully opened (doc_id: DOC_151075259435909708). Recent comparative papers (2023-2024) have been retrieved through search_exa, focusing on LLM training efficiency, architecture innovations, and computational optimization. Immediate next step is to read the full DeepSeek paper using the confirmed doc_id to enable structured analysis.\",\n", - " \"planned_actions\": [\n", - " {\n", - " \"action_type\": \"reader_tool\",\n", - " \"description\": \"Read full DeepSeek_R1 content using valid doc_id for analysis.\"\n", - " }\n", - " ],\n", - " \"extension_needed\": true,\n", - " \"milestone\": \"Obtain full text of DeepSeek paper to begin thematic analysis\",\n", - " \"action_strategy\": \"sequential\",\n", - " \"action_batch_size\": null,\n", - " \"reason\": {\n", - " \"title\": \"Prerequisite Data Collection\",\n", - " \"content\": \"The paper's content must be fully extracted before meaningful comparison can be made with the 5 retrieved recent works (2023-2024). The search results show relevant focus areas including training efficiency (2410.19456), architecture alternatives (2412.15978), and parameter tuning (2411.08610) that will enable targeted comparison once DeepSeek's methodology is known.\",\n", - " \"confidence_score\": 0.92\n", - " },\n", - " \"action_requests\": [\n", - " {\n", - " \"function\": \"reader_tool\",\n", - " \"arguments\": {\n", - " \"action\": \"read\",\n", - " \"doc_id\": \"DOC_151075259435909708\",\n", - " \"start_offset\": 0,\n", - " \"end_offset\": null\n", - " }\n", " }\n", " ],\n", " \"action_required\": true\n", @@ -917,121 +686,20 @@ "**Response**:\n", "```json\n", "{\n", - " \"analysis\": \"DeepSeek-R1 presents a novel RL-centric approach to enhance LLM reasoning without SFT, validated by competitive benchmarks. Key innovations include cold-start data integration and scalable distillation. Comparison with 2023-2024 works reveals distinct methodological differences in training efficiency and architectural choices.\",\n", - " \"planned_actions\": [],\n", - " \"extension_needed\": false,\n", - " \"milestone\": null,\n", - " \"action_strategy\": \"sequential\",\n", - " \"action_batch_size\": null,\n", - " \"reason\": {\n", - " \"title\": \"Final Analysis Ready\",\n", - " \"content\": \"Full paper content and comparative papers have been processed. Structured summary and critical evaluation can now be formulated.\",\n", - " \"confidence_score\": 0.97\n", - " },\n", - " \"action_requests\": [],\n", - " \"action_required\": false,\n", - " \"structured_summary\": {\n", - " \"title\": \"DeepSeek-R1: Incentivizing Reasoning via RL\",\n", - " \"key_themes\": [\n", - " \"Pure RL-driven reasoning without SFT initialization\",\n", - " \"Self-evolution of CoT capabilities through GRPO algorithm\",\n", - " \"Multi-stage training with cold-start data for human alignment\",\n", - " \"Knowledge distillation to smaller models (1.5B-70B parameters)\"\n", - " ],\n", - " \"methodologies\": [\n", - " \"Group Relative Policy Optimization (GRPO) with group-based advantage estimation\",\n", - " \"Rule-based reward system (accuracy + format constraints)\",\n", - " \"Two-phase RL: reasoning-focused then general alignment\",\n", - " \"Distillation using 800K curated samples from R1 outputs\"\n", - " ],\n", - " \"contributions\": [\n", - " \"First demonstration of SFT-free RL reasoning capability emergence\",\n", - " \"79.8% pass@1 on AIME 2024 (vs OpenAI-o1-1217's 79.2%)\",\n", - " \"14B distilled model outperforms QwQ-32B-Preview by +19.7% on MATH-500\",\n", - " \"Open-source release of 6 distilled models across architectures\"\n", - " ]\n", - " },\n", - " \"comparative_analysis\": {\n", - " \"recent_works\": [\n", - " {\n", - " \"paper\": \"Experience of Training 1.7B LLaMa (2412.13335)\",\n", - " \"similarities\": \"Emphasis on training stability and data quality\",\n", - " \"differences\": \"Relies on SFT initialization vs DeepSeek's pure RL approach\",\n", - " \"metrics\": \"20B tokens trained vs DeepSeek's RL-focused pipeline\"\n", - " },\n", - " {\n", - " \"paper\": \"Computational Bottlenecks of SLMs (2410.19456)\",\n", - " \"similarities\": \"Focus on resource-efficient training\",\n", - " \"differences\": \"Analyzes hardware constraints rather than capability emergence\",\n", - " \"metrics\": \"2B param focus vs DeepSeek's 70B distillation\"\n", - " },\n", - " {\n", - " \"paper\": \"BabyHGRN: RNN Efficiency (2412.15978)\",\n", - " \"similarities\": \"Pursuit of compute efficiency\",\n", - " \"differences\": \"RNN architecture vs DeepSeek's transformer-based RL\",\n", - " \"metrics\": \"55.5% BLiMP vs DeepSeek's 97.3% MATH-500\"\n", - " }\n", - " ],\n", - " \"results_comparison\": {\n", - " \"table\": [\n", - " [\"Model\", \"AIME Pass@1\", \"MATH-500\", \"Training Efficiency\"],\n", - " [\"DeepSeek-R1\", \"79.8%\", \"97.3%\", \"37B active params\"],\n", - " [\"OpenAI-o1-1217\", \"79.2%\", \"96.4%\", \"Closed-source\"],\n", - " [\"BabyHGRN (100M)\", \"N/A\", \"83.9%\", \"RNN-based\"],\n", - " [\"QwQ-32B-Preview\", \"50.0%\", \"90.6%\", \"SFT-focused\"]\n", - " ]\n", - " }\n", - " },\n", - " \"critical_evaluation\": {\n", - " \"novelty\": [\n", - " \"First open-source demonstration of pure RL reasoning emergence\",\n", - " \"Cold-start data pipeline resolving RL instability\",\n", - " \"Scalable distillation maintaining 94.3% MATH-500 at 32B scale\"\n", - " ],\n", - " \"limitations\": [\n", - " \"Language mixing in non-Chinese/English queries\",\n", - " \"Sensitivity to prompt engineering (zero-shot required)\",\n", - " \"Limited SWE benchmark improvement vs base model\"\n", - " ],\n", - " \"industry_impact\": \"Provides blueprint for RL-first training pipelines while maintaining 87.6% AlpacaEval 2.0 performance\",\n", - " \"research_gaps\": \"Integration with process reward models and MCTS remains unexplored despite attempts\"\n", - " }\n", - "}\n", - "```" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "# Assistant Response\n", - "\n", - "**Response**:\n", - "```json\n", - "{\n", - " \"title\": \"DeepSeek-R1: Advancing LLM Reasoning Through Reinforcement Learning\",\n", - " \"content\": \"### Structured Summary\\n**Key Themes**:\\n- Pure reinforcement learning (RL) for reasoning capability emergence without supervised fine-tuning (SFT)\\n- Self-evolution of chain-of-thought (CoT) behaviors through Group Relative Policy Optimization (GRPO)\\n- Multi-stage training pipeline with cold-start data integration\\n- Scalable knowledge distillation to smaller models (1.5B-70B parameters)\\n\\n**Methodologies**:\\n1. GRPO algorithm with group-based advantage estimation\\n2. Rule-based reward system (accuracy + format constraints)\\n3. Two-phase RL: reasoning-focused then general alignment\\n4. Distillation using 800K curated samples from R1 outputs\\n\\n**Contributions**:\\n- First open-source demonstration of SFT-free RL reasoning (71% → 86.7% AIME with voting)\\n- Performance parity with OpenAI-o1-1217 (79.8% vs 79.2% AIME pass@1)\\n- 14B distilled model outperforms QwQ-32B by +19.7% on MATH-500\\n- Open-source release of 6 distilled models across Qwen/Llama architectures\\n\\n### Comparative Analysis\\n**Recent Works (2023-2024)**:\\n1. *Training 1.7B LLaMa From Scratch* (arXiv:2412.13335):\\n - Similar: Emphasis on data quality\\n - Different: Requires SFT initialization vs DeepSeek's pure RL\\n - Metrics: 20B tokens trained vs DeepSeek's RL-first approach\\n\\n2. *Computational Bottlenecks of SLMs* (arXiv:2410.19456):\\n - Similar: Focus on training efficiency\\n - Different: Analyzes hardware constraints vs capability emergence\\n - Metrics: 2B param focus vs DeepSeek's 70B distillation\\n\\n3. *BabyHGRN: RNN Efficiency* (arXiv:2412.15978):\\n - Similar: Compute efficiency goals\\n - Different: RNN architecture vs transformer-based RL\\n - Metrics: 55.5% BLiMP vs DeepSeek's 97.3% MATH-500\\n\\n**Results Comparison**:\\n| Model | AIME Pass@1 | MATH-500 | Training Efficiency |\\n|---------------------|-------------|----------|---------------------|\\n| DeepSeek-R1 | 79.8% | 97.3% | 37B active params |\\n| OpenAI-o1-1217 | 79.2% | 96.4% | Closed-source |\\n| BabyHGRN (100M) | N/A | 83.9% | RNN-based |\\n| QwQ-32B-Preview | 50.0% | 90.6% | SFT-focused |\\n\\n### Critical Evaluation\\n**Novelty**:\\n- First successful pure RL reasoning capability emergence\\n- Cold-start pipeline resolving RL instability\\n- 94.3% MATH-500 performance at 32B scale via distillation\\n\\n**Limitations**:\\n- Language mixing in non-Chinese/English queries\\n- Sensitivity to prompt engineering (requires zero-shot)\\n- Limited SWE benchmark improvement (49.2% vs base 42%)\\n\\n**Impact**: Provides blueprint for RL-first training while maintaining 87.6% AlpacaEval performance\",\n", + " \"title\": \"Comprehensive Analysis of 'DeepSeek_R1.pdf' and Its Position in Current Research\",\n", + " \"content\": \"The paper titled 'DeepSeek_R1' presents a novel approach to [insert specific topic, e.g., deep learning-based molecular binding affinity prediction]. The authors propose a [describe the methodology, e.g., a convolutional neural network architecture enhanced with attention mechanisms] to improve the accuracy and efficiency of [specific application, e.g., predicting protein-ligand interactions]. Through extensive experiments, the paper demonstrates that DeepSeek outperforms existing models by [specific metrics, e.g., achieving a 15% increase in prediction accuracy and reducing computational time by 20%]. The study also explores the model's robustness across diverse datasets, highlighting its potential for broader applications in [related fields, e.g., drug discovery and bioinformatics].\\n\\nWhen compared to recent studies in the same domain, such as [Paper A] and [Paper B], DeepSeek offers significant improvements in [specific areas, e.g., model accuracy and computational efficiency]. While [Paper A] introduced a similar neural architecture, it lacked the [specific feature, e.g., attention mechanism] that DeepSeek utilizes to enhance performance. On the other hand, [Paper B] focused on [different approach], which, although innovative, did not achieve the same level of accuracy as DeepSeek.\\n\\nIn terms of results, DeepSeek not only matches but exceeds the performance benchmarks set by these recent studies. The comparative analysis reveals that DeepSeek's integrated approach allows for better handling of [specific challenges, e.g., high-dimensional data], making it a more robust and versatile tool in the field. Additionally, the paper's comprehensive evaluation methodology provides a clearer understanding of the model's strengths and potential areas for future improvement.\",\n", " \"source\": [\n", " {\n", - " \"title\": \"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning\",\n", + " \"title\": \"DeepSeek_R1.pdf\",\n", " \"url\": \"/Users/lion/lionagi/notebooks/data/pdf/DeepSeek_R1.pdf\"\n", " },\n", " {\n", - " \"title\": \"Experience of Training a 1.7B-Parameter LLaMa Model From Scratch\",\n", - " \"url\": \"https://arxiv.org/abs/2412.13335\"\n", - " },\n", - " {\n", - " \"title\": \"Computational Bottlenecks of Training Small-scale Large Language Models\",\n", - " \"url\": \"https://arxiv.org/abs/2410.19456\"\n", + " \"title\": \"Recent Advances in Deep Learning for Molecular Binding Affinity Prediction\",\n", + " \"url\": \"https://www.example.com/recent-advances-deep-learning\"\n", " },\n", " {\n", - " \"title\": \"BabyHGRN: Exploring RNNs for Sample-Efficient Training of Language Models\",\n", - " \"url\": \"https://arxiv.org/abs/2412.15978\"\n", + " \"title\": \"Attention Mechanisms in Neural Networks: A Comprehensive Review\",\n", + " \"url\": \"https://www.example.com/attention-mechanisms-review\"\n", " }\n", " ]\n", "}\n", diff --git a/pyproject.toml b/pyproject.toml index f9d0e7508..1fb4a92bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lionagi" -version = "0.8.6" +version = "0.8.7" description = "An Intelligence Operating System." authors = [ { name = "HaiyangLi", email = "quantocean.li@gmail.com" }, @@ -11,13 +11,13 @@ dependencies = [ "aiocache>=0.12.0", "aiohttp>=3.11.0", "jinja2>=3.1.0", - "litellm>=1.55.3", "pandas>=2.0.0", "pillow>=10.0.0", "pydantic>=2.0.0", "python-dotenv>=1.0.1", ] license = {file = "LICENSE"} + classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", @@ -33,7 +33,7 @@ classifiers=[ "Framework :: Pytest", ] -[dependency-groups] +[project.optional-dependencies] dev = [ "black[jupyter]>=24.10.0", "isort>=5.13.2", @@ -49,6 +49,14 @@ dev = [ ] tools = [ "docling>=2.15.1", + "rich>=13.7.0", + "PyGithub>=2.2.0", + "pytest-mock>=3.12.0", + "e2b-code-interpreter>=0.1.0", + "fuzzywuzzy>=0.18.0", + "python-Levenshtein>=0.25.0", # For better performance with fuzzywuzzy + "gitpython>=3.1.44", + "litellm>=1.55.3", ] [tool.black] @@ -72,6 +80,8 @@ python_files = ["test_*.py"] markers = [ "performance: marks tests as performance", "slow: marks tests as slow", + "asyncio: mark as async", + ] [build-system] diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tools/conftest.py b/tests/tools/conftest.py new file mode 100644 index 000000000..359cd8042 --- /dev/null +++ b/tests/tools/conftest.py @@ -0,0 +1,205 @@ +import os +import tempfile +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from lionagi.tools.reader.reader_tool import ReaderResponse, SearchResult + + +# Common fixtures +@pytest.fixture +def temp_dir(): + """Create a temporary directory for test files.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + +@pytest.fixture +def sample_text(): + """Sample text content for testing.""" + return "This is a test document.\nIt has multiple lines.\nFor testing purposes." + + +@pytest.fixture +def mock_document_converter(mocker, sample_text): + """Mock DocumentConverter for reader.py""" + mock_conv = MagicMock() + mock_doc = MagicMock() + mock_doc.export_to_markdown.return_value = sample_text + mock_conv.convert.return_value.document = mock_doc + mocker.patch( + "lionagi.tools.reader.ReaderTool.DocumentConverter", + return_value=mock_conv, + ) + return mock_conv + + +@pytest.fixture +def mock_github_client(mocker): + """Mock PyGithub client for gh_.py""" + mock_client = MagicMock() + mock_user = MagicMock() + mock_repo = MagicMock() + + # Setup mock user + mock_user.get_repos.return_value = [] + mock_client.get_user.return_value = mock_user + + # Setup mock repo + mock_repo.name = "test-repo" + mock_repo.full_name = "user/test-repo" + mock_repo.private = False + mock_repo.html_url = "https://github.com/user/test-repo" + + # Setup PR operations + mock_pr = MagicMock() + mock_pr.number = 1 + mock_pr.title = "Test PR" + mock_pr.user.login = "test-user" + mock_pr.html_url = "https://github.com/user/test-repo/pull/1" + mock_repo.create_pull.return_value = mock_pr + mock_repo.get_pulls.return_value = [mock_pr] + + # Setup error cases + mock_repo_func = MagicMock() + + def get_repo(full_name): + if full_name == "invalid-url": + raise Exception("Invalid repository") + return mock_repo + + mock_repo_func.side_effect = get_repo + mock_client.get_repo = mock_repo_func + mock_client.get_user.return_value = mock_user + + mocker.patch( + "lionagi.tools.providers.gh_.GithubTool.Github", + return_value=mock_client, + ) + return mock_client + + +@pytest.fixture +def mock_git_command(mocker): + """Mock git command execution for gh_.py""" + + def mock_run(*args, **kwargs): + if "invalid-url" in args[0]: + return "Error: Invalid repository", 1 + return "Mock git output", 0 + + mocker.patch( + "lionagi.tools.providers.gh_.run_git_command", side_effect=mock_run + ) + return mock_run + + +@pytest.fixture +def mock_e2b_sandbox(mocker): + """Mock E2B sandbox for coder.py""" + mock_sandbox = MagicMock() + + # Mock run_code + mock_result = MagicMock() + mock_result.stdout = "Result: 5\n" + mock_result.stderr = "" + mock_result.error = None + mock_result.results = [] + mock_sandbox.run_code.return_value = mock_result + + # Mock commands + mock_command_result = MagicMock() + mock_command_result.exit_code = 0 + mock_command_result.stderr = None + mock_sandbox.commands.run.return_value = mock_command_result + + # Mock files + mock_sandbox.files.write.return_value = True + mock_sandbox.files.read.return_value = b"Test content" + + mock_sandbox_class = MagicMock(return_value=mock_sandbox) + mocker.patch("lionagi.tools.coder.CoderTool.Sandbox", mock_sandbox_class) + return mock_sandbox + + +@pytest.fixture +def mock_rich_console(mocker): + """Mock rich console for coder.py""" + mock_console = MagicMock() + mocker.patch( + "lionagi.tools.coder.CoderTool.Console", return_value=mock_console + ) + return mock_console + + +@pytest.fixture +def mock_tempfile(mocker, temp_dir): + """Mock tempfile operations""" + counter = 0 + + def create_temp_file(*args, **kwargs): + nonlocal counter + counter += 1 + temp_file = temp_dir / f"mock_temp_file_{counter}" + temp_file.write_text("") # Create empty file + + mock_temp = MagicMock() + mock_temp.name = str(temp_file) + return mock_temp + + mock_temp_class = MagicMock(side_effect=create_temp_file) + mocker.patch("tempfile.NamedTemporaryFile", mock_temp_class) + return mock_temp_class + + +@pytest.fixture +def mock_file_operations(mocker, temp_dir): + """Mock file operations for reader.py and writer.py""" + # Store file contents in memory + file_contents = {} + + def mock_read_text(self, *args, **kwargs): + path_str = str(self) + if path_str in file_contents: + return file_contents[path_str] + if os.path.isfile(path_str): + with open(path_str, encoding="utf-8") as f: + return f.read() + return "" + + def mock_write_text(self, content, *args, **kwargs): + file_contents[str(self)] = content + # Create parent directories if they don't exist + os.makedirs(os.path.dirname(str(self)), exist_ok=True) + # Write to actual file using built-in open + with open(str(self), "w", encoding="utf-8") as f: + f.write(content) + + def mock_exists(self): + path_str = str(self) + return path_str in file_contents or os.path.isfile(path_str) + + mocker.patch.object(Path, "read_text", mock_read_text) + mocker.patch.object(Path, "write_text", mock_write_text) + mocker.patch.object(Path, "exists", mock_exists) + + # Mock chunk_content function + def mock_chunk_content(text, chunk_size, overlap, threshold): + return ["Chunk 1", "Chunk 2", "Chunk 3"] + + mocker.patch("lionagi.libs.file.chunk.chunk_content", mock_chunk_content) + + # Mock search functionality + def mock_find(text, query, start_index=0): + return text.find(query, start_index) + + mocker.patch( + "lionagi.tools.reader.ReaderTool._search_doc", + side_effect=lambda doc_id, query: ReaderResponse( + success=True, search_result=SearchResult(positions=[0, 10]) + ), + ) + + return file_contents diff --git a/tests/tools/test_coder.py b/tests/tools/test_coder.py new file mode 100644 index 000000000..23cba9e0a --- /dev/null +++ b/tests/tools/test_coder.py @@ -0,0 +1,246 @@ +from pathlib import Path + +import pytest + +from lionagi.tools.coder.coder import ( + CoderAction, + CoderRequest, + CoderTool, + RunCodeResult, +) + + +@pytest.fixture +def coder_tool(mock_e2b_sandbox, mock_rich_console, temp_dir): + """Initialize CoderTool with mocked dependencies.""" + return CoderTool(e2b_api_key="mock-key") + + +def test_sandbox_lifecycle(coder_tool, mock_e2b_sandbox): + """Test sandbox start, list, and stop operations.""" + # Start sandbox + start_request = CoderRequest(action=CoderAction.start_sandbox) + start_response = coder_tool.handle_request(start_request) + + assert start_response.success + assert start_response.sandbox_id is not None + sandbox_id = start_response.sandbox_id + + # List sandboxes + list_request = CoderRequest(action=CoderAction.list_sandboxes) + list_response = coder_tool.handle_request(list_request) + + assert list_response.success + assert list_response.sandbox_list is not None + assert sandbox_id in list_response.sandbox_list + + # Stop sandbox + stop_request = CoderRequest( + action=CoderAction.stop_sandbox, sandbox_id=sandbox_id + ) + stop_response = coder_tool.handle_request(stop_request) + + assert stop_response.success + + # Verify sandbox is stopped + list_response = coder_tool.handle_request(list_request) + assert sandbox_id not in list_response.sandbox_list + + +def test_code_execution(coder_tool, mock_e2b_sandbox): + """Test code execution in sandbox.""" + # Start sandbox + start_response = coder_tool.handle_request( + CoderRequest(action=CoderAction.start_sandbox) + ) + sandbox_id = start_response.sandbox_id + + # Execute Python code + code = """ + def add(a, b): + return a + b + result = add(2, 3) + print(f"Result: {result}") + """ + + run_request = CoderRequest( + action=CoderAction.run_code, + sandbox_id=sandbox_id, + code=code, + language="python", + ) + run_response = coder_tool.handle_request(run_request) + + assert run_response.success + assert run_response.run_result is not None + assert run_response.run_result.stdout is not None + assert "Result: 5" in run_response.run_result.stdout + assert not run_response.run_result.error + + +def test_package_installation(coder_tool, mock_e2b_sandbox): + """Test package installation in sandbox.""" + # Start sandbox + start_response = coder_tool.handle_request( + CoderRequest(action=CoderAction.start_sandbox) + ) + sandbox_id = start_response.sandbox_id + + # Install package with pip + install_request = CoderRequest( + action=CoderAction.install_pkg, + sandbox_id=sandbox_id, + pkg_manager="pip", + pkg_name="requests", + ) + install_response = coder_tool.handle_request(install_request) + + assert install_response.success + + # Install package with npm + install_request = CoderRequest( + action=CoderAction.install_pkg, + sandbox_id=sandbox_id, + pkg_manager="npm", + pkg_name="axios", + ) + install_response = coder_tool.handle_request(install_request) + + assert install_response.success + + +def test_file_operations(coder_tool, mock_e2b_sandbox, temp_dir): + """Test file upload and download operations.""" + # Start sandbox + start_response = coder_tool.handle_request( + CoderRequest(action=CoderAction.start_sandbox) + ) + sandbox_id = start_response.sandbox_id + + # Create test file + test_file = temp_dir / "test.txt" + test_file.write_text("Test content") + + # Upload file + upload_request = CoderRequest( + action=CoderAction.upload_file, + sandbox_id=sandbox_id, + local_path=str(test_file), + remote_path="/workspace/test.txt", + ) + upload_response = coder_tool.handle_request(upload_request) + + assert upload_response.success + + # Download file + download_path = temp_dir / "downloaded.txt" + download_request = CoderRequest( + action=CoderAction.download_file, + sandbox_id=sandbox_id, + local_path=str(download_path), + remote_path="/workspace/test.txt", + ) + download_response = coder_tool.handle_request(download_request) + + assert download_response.success + assert download_path.exists() + + +def test_shell_command(coder_tool, mock_rich_console): + """Test shell command execution.""" + request = CoderRequest( + action=CoderAction.shell_command, command="echo 'Hello, World!'" + ) + response = coder_tool.handle_request(request) + + assert response.success + assert response.command_output is not None + assert response.return_code == 0 + + +def test_error_handling(coder_tool, mock_e2b_sandbox): + """Test error handling for various scenarios.""" + # Test invalid sandbox ID + run_request = CoderRequest( + action=CoderAction.run_code, + sandbox_id="nonexistent", + code="print('test')", + ) + response = coder_tool.handle_request(run_request) + assert not response.success + assert response.error is not None + + # Test invalid package manager + install_request = CoderRequest( + action=CoderAction.install_pkg, + sandbox_id="test", + pkg_manager="invalid", + pkg_name="test", + ) + response = coder_tool.handle_request(install_request) + assert not response.success + assert response.error is not None + + # Test file operation errors + upload_request = CoderRequest( + action=CoderAction.upload_file, + sandbox_id="test", + local_path="/nonexistent/path", + remote_path="/test.txt", + ) + response = coder_tool.handle_request(upload_request) + assert not response.success + assert response.error is not None + + # Test shell command errors + command_request = CoderRequest( + action=CoderAction.shell_command, command="nonexistent-command" + ) + response = coder_tool.handle_request(command_request) + assert not response.success + assert response.error is not None + + +def test_sandbox_resource_options(coder_tool, mock_e2b_sandbox): + """Test sandbox creation with different resource options.""" + # Test with CPU and RAM specifications + request = CoderRequest(action=CoderAction.start_sandbox, cpu=2, ram=4096) + response = coder_tool.handle_request(request) + assert response.success + assert response.sandbox_id is not None + + # Test with custom template + request = CoderRequest( + action=CoderAction.start_sandbox, template="custom-template" + ) + response = coder_tool.handle_request(request) + assert response.success + assert response.sandbox_id is not None + + +def test_code_execution_languages(coder_tool, mock_e2b_sandbox): + """Test code execution in different languages.""" + start_response = coder_tool.handle_request( + CoderRequest(action=CoderAction.start_sandbox) + ) + sandbox_id = start_response.sandbox_id + + # Test Python + python_request = CoderRequest( + action=CoderAction.run_code, + sandbox_id=sandbox_id, + code="print('Hello from Python')", + language="python", + ) + response = coder_tool.handle_request(python_request) + assert response.success + + # Test JavaScript + js_request = CoderRequest( + action=CoderAction.run_code, + sandbox_id=sandbox_id, + code="console.log('Hello from JavaScript')", + language="javascript", + ) + response = coder_tool.handle_request(js_request) + assert response.success diff --git a/tests/tools/test_github.py b/tests/tools/test_github.py new file mode 100644 index 000000000..a0f7ed4f1 --- /dev/null +++ b/tests/tools/test_github.py @@ -0,0 +1,202 @@ +from pathlib import Path + +import pytest + +from lionagi.tools.providers.gh_.gh_tool import ( + GithubAction, + GithubRequest, + GithubTool, + PRInfo, + RepoInfo, +) + + +@pytest.fixture +def github_tool(mock_github_client, mock_git_command, temp_dir): + """Initialize GithubTool with mocked dependencies.""" + return GithubTool(github_token="mock-token") + + +def test_list_repos(github_tool, mock_github_client): + """Test listing repositories.""" + # Setup mock repos + mock_repo = ( + mock_github_client.get_user.return_value.get_repos.return_value + ) = [ + type( + "MockRepo", + (), + { + "name": "test-repo", + "full_name": "user/test-repo", + "private": False, + "html_url": "https://github.com/user/test-repo", + }, + ) + ] + + request = GithubRequest(action=GithubAction.list_repos) + response = github_tool.handle_request(request) + + assert response.success + assert response.repos is not None + assert len(response.repos) == 1 + assert response.repos[0].name == "test-repo" + assert response.repos[0].full_name == "user/test-repo" + assert not response.repos[0].private + + +def test_clone_repo(github_tool, temp_dir): + """Test cloning a repository.""" + request = GithubRequest( + action=GithubAction.clone_repo, + repo_url="https://github.com/user/test-repo.git", + local_path=str(temp_dir / "test-repo"), + ) + response = github_tool.handle_request(request) + + assert response.success + assert response.output is not None + + +def test_branch_operations(github_tool, temp_dir): + """Test branch creation and checkout.""" + repo_path = temp_dir / "test-repo" + + # Create branch + create_request = GithubRequest( + action=GithubAction.create_branch, + local_path=str(repo_path), + branch_name="feature/test", + ) + create_response = github_tool.handle_request(create_request) + assert create_response.success + + # Checkout branch + checkout_request = GithubRequest( + action=GithubAction.checkout_branch, + local_path=str(repo_path), + branch_name="feature/test", + ) + checkout_response = github_tool.handle_request(checkout_request) + assert checkout_response.success + + +def test_commit_push(github_tool, temp_dir): + """Test commit and push operations.""" + repo_path = temp_dir / "test-repo" + + request = GithubRequest( + action=GithubAction.commit_push, + local_path=str(repo_path), + commit_message="Test commit", + files_to_commit=["test.txt"], + ) + response = github_tool.handle_request(request) + + assert response.success + assert response.output is not None + + +def test_pull_request_operations(github_tool, mock_github_client): + """Test pull request operations.""" + # Setup mock PR + mock_pr = type( + "MockPR", + (), + { + "number": 1, + "title": "Test PR", + "user": type("MockUser", (), {"login": "test-user"}), + "html_url": "https://github.com/user/test-repo/pull/1", + }, + ) + mock_github_client.get_repo.return_value.get_pulls.return_value = [mock_pr] + + # Test opening PR + open_request = GithubRequest( + action=GithubAction.open_pull_request, + repo_url="https://github.com/user/test-repo", + branch_name="feature/test", + base_branch="main", + pr_title="Test PR", + pr_body="Test PR description", + ) + open_response = github_tool.handle_request(open_request) + assert open_response.success + + # Test listing PRs + list_request = GithubRequest( + action=GithubAction.list_prs, + repo_url="https://github.com/user/test-repo", + ) + list_response = github_tool.handle_request(list_request) + assert list_response.success + assert list_response.prs is not None + assert len(list_response.prs) == 1 + assert list_response.prs[0].number == 1 + + # Test merging PR + merge_request = GithubRequest( + action=GithubAction.merge_pr, + repo_url="https://github.com/user/test-repo", + pr_number=1, + ) + merge_response = github_tool.handle_request(merge_request) + assert merge_response.success + + +def test_error_handling(github_tool, mock_github_client): + """Test error handling for various scenarios.""" + # Test missing token + tool_no_token = GithubTool() + request = GithubRequest(action=GithubAction.list_repos) + response = tool_no_token.handle_request(request) + assert not response.success + assert response.error is not None + + # Test invalid repo URL + clone_request = GithubRequest( + action=GithubAction.clone_repo, + repo_url="invalid-url", + local_path="/tmp/test", + ) + response = github_tool.handle_request(clone_request) + assert not response.success + assert response.error is not None + + # Test missing required fields + pr_request = GithubRequest( + action=GithubAction.open_pull_request, + repo_url="https://github.com/user/test-repo", + # Missing required fields + ) + response = github_tool.handle_request(pr_request) + assert not response.success + assert response.error is not None + + # Test API errors + mock_github_client.get_user.side_effect = Exception("API Error") + request = GithubRequest(action=GithubAction.list_repos) + response = github_tool.handle_request(request) + assert not response.success + assert response.error is not None + + +def test_local_git_operations_error_handling(github_tool, mocker): + """Test error handling for local git operations.""" + # Mock git command to fail + mocker.patch( + "lionagi.tools.providers.gh_.run_git_command", + return_value=("Error output", 1), + ) + + request = GithubRequest( + action=GithubAction.create_branch, + local_path="/tmp/test", + branch_name="feature/test", + ) + response = github_tool.handle_request(request) + + assert not response.success + assert response.error is not None diff --git a/tests/tools/test_reader.py b/tests/tools/test_reader.py new file mode 100644 index 000000000..507bf6db3 --- /dev/null +++ b/tests/tools/test_reader.py @@ -0,0 +1,197 @@ +from pathlib import Path + +import pytest + +from lionagi.tools.reader.reader_tool import ( + ReaderAction, + ReaderRequest, + ReaderTool, +) + + +@pytest.fixture +def reader_tool(mock_document_converter, temp_dir): + """Initialize ReaderTool with mocked dependencies.""" + return ReaderTool() + + +def test_open_document( + reader_tool, temp_dir, sample_text, mock_file_operations +): + """Test opening a document.""" + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + request = ReaderRequest( + action=ReaderAction.open, path_or_url=str(test_file) + ) + + response = reader_tool.handle_request(request) + assert response.success + assert response.doc_info is not None + assert response.doc_info.doc_id.startswith("DOC_") + assert response.doc_info.length is not None + + +def test_read_document( + reader_tool, temp_dir, sample_text, mock_file_operations +): + """Test reading a document with partial offsets.""" + # First open the document + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + open_request = ReaderRequest( + action=ReaderAction.open, path_or_url=str(test_file) + ) + open_response = reader_tool.handle_request(open_request) + doc_id = open_response.doc_info.doc_id + + # Test reading with offsets + read_request = ReaderRequest( + action=ReaderAction.read, doc_id=doc_id, start_offset=5, end_offset=20 + ) + read_response = reader_tool.handle_request(read_request) + + assert read_response.success + assert read_response.chunk is not None + assert read_response.chunk.content is not None + assert read_response.chunk.start_offset == 5 + assert read_response.chunk.end_offset == 20 + + +def test_search_document( + reader_tool, temp_dir, sample_text, mock_file_operations +): + """Test searching within a document.""" + # First open the document + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + open_request = ReaderRequest( + action=ReaderAction.open, path_or_url=str(test_file) + ) + open_response = reader_tool.handle_request(open_request) + doc_id = open_response.doc_info.doc_id + + # Test searching + search_request = ReaderRequest( + action=ReaderAction.search, doc_id=doc_id, search_query="test" + ) + search_response = reader_tool.handle_request(search_request) + + assert search_response.success + assert search_response.search_result is not None + assert len(search_response.search_result.positions) > 0 + + +def test_chunk_document( + reader_tool, temp_dir, sample_text, mock_file_operations +): + """Test document chunking functionality.""" + # First open the document + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + open_request = ReaderRequest( + action=ReaderAction.open, path_or_url=str(test_file) + ) + open_response = reader_tool.handle_request(open_request) + doc_id = open_response.doc_info.doc_id + + # Test chunking + chunk_request = ReaderRequest( + action=ReaderAction.chunk_doc, + doc_id=doc_id, + chunk_size=20, + overlap=0.1, + threshold=5, + ) + chunk_response = reader_tool.handle_request(chunk_request) + + assert chunk_response.success + assert chunk_response.chunk_list is not None + assert len(chunk_response.chunk_list) > 0 + + # Verify chunk metadata + first_chunk = chunk_response.chunk_list[0] + assert first_chunk.index == 0 + assert first_chunk.start == 0 + assert first_chunk.end > 0 + assert first_chunk.text + + +def test_list_and_close_documents( + reader_tool, temp_dir, sample_text, mock_file_operations +): + """Test listing and closing documents.""" + # First open a document + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + open_request = ReaderRequest( + action=ReaderAction.open, path_or_url=str(test_file) + ) + open_response = reader_tool.handle_request(open_request) + doc_id = open_response.doc_info.doc_id + + # Test list_docs + list_request = ReaderRequest(action=ReaderAction.list_docs) + list_response = reader_tool.handle_request(list_request) + + assert list_response.success + assert list_response.doc_list is not None + assert doc_id in list_response.doc_list + + # Test closing + close_request = ReaderRequest(action=ReaderAction.close, doc_id=doc_id) + close_response = reader_tool.handle_request(close_request) + + assert close_response.success + + # Verify document is closed + list_response = reader_tool.handle_request(list_request) + assert doc_id not in list_response.doc_list + + +def test_error_handling(reader_tool): + """Test error handling for various scenarios.""" + # Test invalid doc_id + read_request = ReaderRequest( + action=ReaderAction.read, + doc_id="NONEXISTENT", + start_offset=0, + end_offset=10, + ) + response = reader_tool.handle_request(read_request) + assert not response.success + assert response.error is not None + + # Test missing required fields + search_request = ReaderRequest( + action=ReaderAction.search, + doc_id="NONEXISTENT", + # Missing search_query + ) + response = reader_tool.handle_request(search_request) + assert not response.success + assert response.error is not None + + # Test invalid file path + open_request = ReaderRequest( + action=ReaderAction.open, path_or_url="/nonexistent/path/file.txt" + ) + response = reader_tool.handle_request(open_request) + assert response.success # Should succeed but create empty doc + + # Test invalid chunk parameters + chunk_request = ReaderRequest( + action=ReaderAction.chunk_doc, + doc_id="NONEXISTENT", + chunk_size=-1, # Invalid size + overlap=0.1, + threshold=5, + ) + response = reader_tool.handle_request(chunk_request) + assert not response.success + assert response.error is not None diff --git a/tests/tools/test_writer.py b/tests/tools/test_writer.py new file mode 100644 index 000000000..2192b2d02 --- /dev/null +++ b/tests/tools/test_writer.py @@ -0,0 +1,260 @@ +import json +from pathlib import Path + +import pytest + +from lionagi.tools.writer.writer import ( + WriterAction, + WriterDocumentInfo, + WriterRequest, + WriterTool, +) + + +@pytest.fixture +def writer_tool(mock_tempfile, temp_dir): + """Initialize WriterTool with mocked dependencies.""" + return WriterTool(allowed_root=str(temp_dir)) + + +def test_open_document( + writer_tool, temp_dir, sample_text, mock_file_operations, mock_tempfile +): + """Test opening a document.""" + # Test opening with existing file + test_file = temp_dir / "test.txt" + test_file.write_text(sample_text) + + request = WriterRequest(action=WriterAction.open, path=str(test_file)) + response = writer_tool.handle_request(request) + + assert response.success + assert response.doc_info is not None + assert response.doc_info.doc_id.startswith("WRITER_") + assert response.doc_info.length == len(sample_text) + + # Test opening without path (new empty doc) + request = WriterRequest(action=WriterAction.open) + response = writer_tool.handle_request(request) + + assert response.success + assert response.doc_info is not None + assert response.doc_info.length == 0 + + +def test_write_document( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test writing to a document.""" + # First open a document + open_request = WriterRequest(action=WriterAction.open) + open_response = writer_tool.handle_request(open_request) + doc_id = open_response.doc_info.doc_id + + # Test appending + append_request = WriterRequest( + action=WriterAction.write, doc_id=doc_id, content="Hello, World!" + ) + append_response = writer_tool.handle_request(append_request) + + assert append_response.success + assert append_response.updated_length == len("Hello, World!") + + # Test writing with offsets + write_request = WriterRequest( + action=WriterAction.write, + doc_id=doc_id, + content="New", + start_offset=0, + end_offset=5, + ) + write_response = writer_tool.handle_request(write_request) + + assert write_response.success + assert write_response.updated_length is not None + + +def test_list_and_close_documents( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test listing and closing documents.""" + # Open two documents + doc1_response = writer_tool.handle_request( + WriterRequest(action=WriterAction.open) + ) + doc2_response = writer_tool.handle_request( + WriterRequest(action=WriterAction.open) + ) + + doc1_id = doc1_response.doc_info.doc_id + doc2_id = doc2_response.doc_info.doc_id + + # Test list_docs + list_request = WriterRequest(action=WriterAction.list_docs) + list_response = writer_tool.handle_request(list_request) + + assert list_response.success + assert list_response.doc_list is not None + assert doc1_id in list_response.doc_list + assert doc2_id in list_response.doc_list + + # Test closing + close_request = WriterRequest(action=WriterAction.close, doc_id=doc1_id) + close_response = writer_tool.handle_request(close_request) + + assert close_response.success + + # Verify document is closed + list_response = writer_tool.handle_request(list_request) + assert doc1_id not in list_response.doc_list + assert doc2_id in list_response.doc_list + + +def test_save_file(writer_tool, temp_dir, mock_file_operations, mock_tempfile): + """Test saving file to disk.""" + content = "Test content to save" + request = WriterRequest( + action=WriterAction.save_file, + directory=str(temp_dir), + filename="test.txt", + content=content, + ) + response = writer_tool.handle_request(request) + + assert response.success + assert response.saved_path is not None + + # Verify file was saved + saved_file = Path(response.saved_path) + assert saved_file.exists() + assert saved_file.read_text() == content + + +def test_save_chunks( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test saving chunks to disk.""" + chunks = [ + {"index": 0, "text": "Chunk 1"}, + {"index": 1, "text": "Chunk 2"}, + {"index": 2, "text": "Chunk 3"}, + ] + + request = WriterRequest( + action=WriterAction.save_chunks, + directory=str(temp_dir), + filename="chunks.json", + chunks=chunks, + ) + response = writer_tool.handle_request(request) + + assert response.success + assert response.saved_path is not None + + # Verify chunks were saved + saved_file = Path(response.saved_path) + assert saved_file.exists() + saved_chunks = json.loads(saved_file.read_text()) + assert len(saved_chunks) == 3 + assert saved_chunks[0]["text"] == "Chunk 1" + + +def test_path_restrictions( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test path restriction enforcement.""" + # Try to save outside allowed root + request = WriterRequest( + action=WriterAction.save_file, + directory="/tmp/outside", + filename="test.txt", + content="Test", + ) + response = writer_tool.handle_request(request) + + assert not response.success + assert response.error is not None + assert "outside allowed root" in response.error.lower() + + +def test_error_handling( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test error handling for various scenarios.""" + # Test invalid doc_id + write_request = WriterRequest( + action=WriterAction.write, doc_id="NONEXISTENT", content="test" + ) + response = writer_tool.handle_request(write_request) + assert not response.success + assert response.error is not None + + # Test missing required fields + save_request = WriterRequest( + action=WriterAction.save_file, + directory="/tmp", + # Missing filename and content + ) + response = writer_tool.handle_request(save_request) + assert not response.success + assert response.error is not None + + # Test invalid offsets + open_response = writer_tool.handle_request( + WriterRequest(action=WriterAction.open) + ) + doc_id = open_response.doc_info.doc_id + + write_request = WriterRequest( + action=WriterAction.write, + doc_id=doc_id, + content="test", + start_offset=-1, # Invalid offset + ) + response = writer_tool.handle_request(write_request) + assert response.success # Should handle invalid offset gracefully + + # Test invalid chunks + chunks_request = WriterRequest( + action=WriterAction.save_chunks, + directory="/tmp", + filename="test.json", + chunks=None, # Invalid chunks + ) + response = writer_tool.handle_request(chunks_request) + assert not response.success + assert response.error is not None + + +def test_document_persistence( + writer_tool, temp_dir, mock_file_operations, mock_tempfile +): + """Test document content persistence across operations.""" + # Open and write + open_response = writer_tool.handle_request( + WriterRequest(action=WriterAction.open) + ) + doc_id = open_response.doc_info.doc_id + + content = "Initial content" + write_response = writer_tool.handle_request( + WriterRequest( + action=WriterAction.write, doc_id=doc_id, content=content + ) + ) + assert write_response.success + + # Save to file + save_request = WriterRequest( + action=WriterAction.save_file, + directory=str(temp_dir), + filename="persistent.txt", + content=content, + ) + save_response = writer_tool.handle_request(save_request) + assert save_response.success + + # Verify content + saved_file = Path(save_response.saved_path) + assert saved_file.exists() + assert saved_file.read_text() == content diff --git a/uv.lock b/uv.lock index 7f49dce7c..bbf0c798d 100644 --- a/uv.lock +++ b/uv.lock @@ -191,19 +191,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/aa/ab0f7891a01eeb2d2e338ae8fecbe57fcebea1a24dbb64d45801bfab481d/attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308", size = 63397 }, ] -[[package]] -name = "autoflake" -version = "2.3.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyflakes" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2a/cb/486f912d6171bc5748c311a2984a301f4e2d054833a1da78485866c71522/autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e", size = 27642 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/ee/3fd29bf416eb4f1c5579cf12bf393ae954099258abd7bde03c4f9716ef6b/autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840", size = 32483 }, -] - [[package]] name = "babel" version = "2.16.0" @@ -434,6 +421,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 }, ] +[[package]] +name = "cryptography" +version = "44.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/4c/45dfa6829acffa344e3967d6006ee4ae8be57af746ae2eba1c431949b32c/cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02", size = 710657 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/09/8cc67f9b84730ad330b3b72cf867150744bf07ff113cda21a15a1c6d2c7c/cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123", size = 6541833 }, + { url = "https://files.pythonhosted.org/packages/7e/5b/3759e30a103144e29632e7cb72aec28cedc79e514b2ea8896bb17163c19b/cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092", size = 3922710 }, + { url = "https://files.pythonhosted.org/packages/5f/58/3b14bf39f1a0cfd679e753e8647ada56cddbf5acebffe7db90e184c76168/cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f", size = 4137546 }, + { url = "https://files.pythonhosted.org/packages/98/65/13d9e76ca19b0ba5603d71ac8424b5694415b348e719db277b5edc985ff5/cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb", size = 3915420 }, + { url = "https://files.pythonhosted.org/packages/b1/07/40fe09ce96b91fc9276a9ad272832ead0fddedcba87f1190372af8e3039c/cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b", size = 4154498 }, + { url = "https://files.pythonhosted.org/packages/75/ea/af65619c800ec0a7e4034207aec543acdf248d9bffba0533342d1bd435e1/cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543", size = 3932569 }, + { url = "https://files.pythonhosted.org/packages/c7/af/d1deb0c04d59612e3d5e54203159e284d3e7a6921e565bb0eeb6269bdd8a/cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e", size = 4016721 }, + { url = "https://files.pythonhosted.org/packages/bd/69/7ca326c55698d0688db867795134bdfac87136b80ef373aaa42b225d6dd5/cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e", size = 4240915 }, + { url = "https://files.pythonhosted.org/packages/ef/d4/cae11bf68c0f981e0413906c6dd03ae7fa864347ed5fac40021df1ef467c/cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053", size = 2757925 }, + { url = "https://files.pythonhosted.org/packages/64/b1/50d7739254d2002acae64eed4fc43b24ac0cc44bf0a0d388d1ca06ec5bb1/cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd", size = 3202055 }, + { url = "https://files.pythonhosted.org/packages/11/18/61e52a3d28fc1514a43b0ac291177acd1b4de00e9301aaf7ef867076ff8a/cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591", size = 6542801 }, + { url = "https://files.pythonhosted.org/packages/1a/07/5f165b6c65696ef75601b781a280fc3b33f1e0cd6aa5a92d9fb96c410e97/cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7", size = 3922613 }, + { url = "https://files.pythonhosted.org/packages/28/34/6b3ac1d80fc174812486561cf25194338151780f27e438526f9c64e16869/cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc", size = 4137925 }, + { url = "https://files.pythonhosted.org/packages/d0/c7/c656eb08fd22255d21bc3129625ed9cd5ee305f33752ef2278711b3fa98b/cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289", size = 3915417 }, + { url = "https://files.pythonhosted.org/packages/ef/82/72403624f197af0db6bac4e58153bc9ac0e6020e57234115db9596eee85d/cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7", size = 4155160 }, + { url = "https://files.pythonhosted.org/packages/a2/cd/2f3c440913d4329ade49b146d74f2e9766422e1732613f57097fea61f344/cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c", size = 3932331 }, + { url = "https://files.pythonhosted.org/packages/7f/df/8be88797f0a1cca6e255189a57bb49237402b1880d6e8721690c5603ac23/cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64", size = 4017372 }, + { url = "https://files.pythonhosted.org/packages/af/36/5ccc376f025a834e72b8e52e18746b927f34e4520487098e283a719c205e/cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285", size = 4239657 }, + { url = "https://files.pythonhosted.org/packages/46/b0/f4f7d0d0bcfbc8dd6296c1449be326d04217c57afb8b2594f017eed95533/cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417", size = 2758672 }, + { url = "https://files.pythonhosted.org/packages/97/9b/443270b9210f13f6ef240eff73fd32e02d381e7103969dc66ce8e89ee901/cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede", size = 3202071 }, + { url = "https://files.pythonhosted.org/packages/77/d4/fea74422326388bbac0c37b7489a0fcb1681a698c3b875959430ba550daa/cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731", size = 3338857 }, + { url = "https://files.pythonhosted.org/packages/1a/aa/ba8a7467c206cb7b62f09b4168da541b5109838627f582843bbbe0235e8e/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4", size = 3850615 }, + { url = "https://files.pythonhosted.org/packages/89/fa/b160e10a64cc395d090105be14f399b94e617c879efd401188ce0fea39ee/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756", size = 4081622 }, + { url = "https://files.pythonhosted.org/packages/47/8f/20ff0656bb0cf7af26ec1d01f780c5cfbaa7666736063378c5f48558b515/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c", size = 3867546 }, + { url = "https://files.pythonhosted.org/packages/38/d9/28edf32ee2fcdca587146bcde90102a7319b2f2c690edfa627e46d586050/cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa", size = 4090937 }, + { url = "https://files.pythonhosted.org/packages/cc/9d/37e5da7519de7b0b070a3fedd4230fe76d50d2a21403e0f2153d70ac4163/cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c", size = 3128774 }, +] + [[package]] name = "debugpy" version = "1.8.12" @@ -500,6 +524,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/cd/e6507d924aa69e9647f917ed671e2d62e19e41d4f120a15fcbb583661667/deepsearch_glm-1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2315cc4ffe7032dada294a0cd72a47dbc6c0121fd07d4b5719f9a9e9519d091", size = 14644989 }, ] +[[package]] +name = "deprecated" +version = "1.2.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/a3/53e7d78a6850ffdd394d7048a31a6f14e44900adedf190f9a165f6b69439/deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d", size = 2977612 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/8f/c7f227eb42cfeaddce3eb0c96c60cbca37797fa7b34f8e1aeadf6c5c0983/Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320", size = 9941 }, +] + [[package]] name = "dill" version = "0.3.9" @@ -537,7 +573,7 @@ dependencies = [ { name = "deepsearch-glm" }, { name = "docling-core", extra = ["chunking"] }, { name = "docling-ibm-models", version = "3.1.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'darwin') or python_full_version >= '3.11'" }, - { name = "docling-ibm-models", version = "3.1.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')" }, + { name = "docling-ibm-models", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')" }, { name = "docling-parse" }, { name = "easyocr" }, { name = "filetype" }, @@ -563,7 +599,7 @@ wheels = [ [[package]] name = "docling-core" -version = "2.14.0" +version = "2.15.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonref" }, @@ -576,9 +612,9 @@ dependencies = [ { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/25/da36fc7001b54b1fab585fff7641e9f237e9edab786236e705c56eed4af9/docling_core-2.14.0.tar.gz", hash = "sha256:0eb6a52e05f2a06e1777b0533d655a87b54a1a5d374b957beb244c8940aed7da", size = 71271 } +sdist = { url = "https://files.pythonhosted.org/packages/52/61/9204dc91cb1b4def7c0bc098881c10e6c9a804580b9e74fe7c4fda7dbb63/docling_core-2.15.1.tar.gz", hash = "sha256:588d941b5bfc393a79e779ab64819c60763e7f182ec5221ee37da4be91dd802f", size = 72028 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/c1/9b1de2b9b7b6c5b38aca263cb839155e33d39c66175a9b8acf6fc1fab043/docling_core-2.14.0-py3-none-any.whl", hash = "sha256:05a7b89872260dcdba2b0fbcc3a4619aed4846f58f155d33a10b41b23eea5188", size = 91387 }, + { url = "https://files.pythonhosted.org/packages/33/69/9a8ae199dd2a1a27439ed6fe5785d2d6a557da9ee4e6236d9d6bc77bb07b/docling_core-2.15.1-py3-none-any.whl", hash = "sha256:33152604e1f14d5caccbef099c73163c3f211d0b4d92403d262c308633cc0451", size = 92022 }, ] [package.optional-dependencies] @@ -620,7 +656,7 @@ wheels = [ [[package]] name = "docling-ibm-models" -version = "3.1.2" +version = "3.2.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'x86_64' and platform_system == 'Darwin') or (python_full_version < '3.11' and platform_system == 'Darwin' and sys_platform != 'darwin')", @@ -639,46 +675,45 @@ dependencies = [ { name = "tqdm", marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')" }, { name = "transformers", marker = "(python_full_version < '3.11' and platform_machine != 'x86_64') or (python_full_version < '3.11' and sys_platform != 'darwin')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/c2/f1a8fd16deaff8d1438b4544f02d11a1e10a576299b70ccc10b51e509ed0/docling_ibm_models-3.1.2.tar.gz", hash = "sha256:68c8b8f1cb87a8d8c5c6d6fe2c86679d65a09d354ec413ed13e9493ee0cd3794", size = 58789 } +sdist = { url = "https://files.pythonhosted.org/packages/21/0b/c9d6f2d3c4406ea55d118bbf28eede9d21c874ae4f07f1c2bfd5f20b2785/docling_ibm_models-3.2.1.tar.gz", hash = "sha256:abd1bdc58f00600065eedbfbd34876704d5004cd20884a2c0a61ca2ee5a927dd", size = 67055 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/fb/9a5c168c74d5c8c68361f91df9cacfe22e8104d6a2f404541d01588166e3/docling_ibm_models-3.1.2-py3-none-any.whl", hash = "sha256:c5d2fa83db08ec538bb77e3d5d79c9ccef7b6873aab19ddcf5bb5e9801bf4a03", size = 65987 }, + { url = "https://files.pythonhosted.org/packages/ed/67/c281252738e1f05548f3fd42da6d7be5bb015d1f31369a6fa3ae32d2b8fc/docling_ibm_models-3.2.1-py3-none-any.whl", hash = "sha256:55bca5673381cc5862f4de584345020d071414c46bc1b9f6436d674e3610ec97", size = 76499 }, ] [[package]] name = "docling-parse" -version = "3.1.0" +version = "3.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "autoflake" }, { name = "docling-core" }, { name = "pillow" }, { name = "pydantic" }, { name = "pywin32", marker = "sys_platform == 'win32'" }, { name = "tabulate" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7c/e1/3bd2c73e49cb965c40d04baf09e43de98db709eaa5907ec240133078802d/docling_parse-3.1.0.tar.gz", hash = "sha256:4a159222c7c4b4b7932330168405ef2b887bc5e3d217c9dd25a9d9b51f9187d3", size = 34652148 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/27/a3e174fbf7e603464f75646eef68dc471867a6ae008b1ed0a8da1868f2a6/docling_parse-3.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:0bfd425f2a66a23e7dce51aa461984b69cbb6cf2d7e948c1388314b5a6089045", size = 22044259 }, - { url = "https://files.pythonhosted.org/packages/38/7e/2ec1585b12753714dd5465f9b8ad820140dc24f9be0c5e9a7c2c75ac5b3f/docling_parse-3.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:d1c8c689915f8d768fc850fd3e98d9e6e3d50fb070cab84fddac2465c3bc78bc", size = 21925899 }, - { url = "https://files.pythonhosted.org/packages/7c/8e/7ac280d0cf41c3ce8c7e727eccd8d18d7a0efc9deaf07a85e02fc2a8af9d/docling_parse-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c24570fcc9e820b86de4996e8c8fa76d490ae8b16c9dc264a2d60f4aaf5801", size = 22375255 }, - { url = "https://files.pythonhosted.org/packages/b5/70/c0d857d08ec228c6ed678cb288787d344bc9226d72202fcf5cabf3ab0561/docling_parse-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9e05e744f6ffbfe6e65b53a0a6bdc440814b2581edf1dbab26aca38b44514a5", size = 22442771 }, - { url = "https://files.pythonhosted.org/packages/0f/39/7f68a4358fc2369f678b39eec0f159bacfdc691a92e66d0d4e37f200fa1b/docling_parse-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59507c9a9672032b3029df7318ab017bf66986f95e00edc5d2e4c5c6ec5fa7e", size = 23231228 }, - { url = "https://files.pythonhosted.org/packages/b8/ec/db1571e944a1f5f80a34995d6bd666121d15ccfd542dff0c1b7e78f54b85/docling_parse-3.1.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:428098cc5f8a449101099f77e53862a1e8db58e27a028a2a9ce397be26d4788f", size = 22045341 }, - { url = "https://files.pythonhosted.org/packages/c5/03/666897fd8396270cbb8b15b420e7f42d8471dbccda241b974faa6005c5ac/docling_parse-3.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:5f877b9d5ac46828327e5bd94da7b2f54da900d4a729c52533df904dd3463f85", size = 21927170 }, - { url = "https://files.pythonhosted.org/packages/20/65/6c196b24853c4e23530aa15bc6599f4cdc2e535b6ea15ae11f4e8652a096/docling_parse-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c10e587ca2c55522f7cf35b8d9983f1b2eeef6f67b168da157ad4fc1e3ad5ba2", size = 22376956 }, - { url = "https://files.pythonhosted.org/packages/ee/10/6c6228525b7f0c655060efdfdc5783c7ecb8ff63c076dda7162c48fe3b89/docling_parse-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2da249c455758f422d47379202dc599a3f35fe811ed03dc019fd42548d8cd2b", size = 22444772 }, - { url = "https://files.pythonhosted.org/packages/52/83/ee33dd82d96c46329679b7fc743814db545b3a808a5e28913ebff3971461/docling_parse-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:2aeaa1a34f92208ed63784ee04f69644f32974b520d957db753256c6d0ccf2b6", size = 23233083 }, - { url = "https://files.pythonhosted.org/packages/eb/ba/4615db3ef1e4f54de5c3c4e977eb19056f25da379f2a1b3acd6f092361fb/docling_parse-3.1.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:6997eb40f7d04e4438e3a5218df23c07b260cbece3f150cdd428bf05f927a389", size = 22045765 }, - { url = "https://files.pythonhosted.org/packages/26/bd/28dd6dc99760e60e64aab2f7f85603ad284a34bcf96aa92ace40bd0ced8f/docling_parse-3.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1307169244cd145c59c2c8137bb9c0c132dc2bb0abf890875e8ca22f09fa4f99", size = 21927136 }, - { url = "https://files.pythonhosted.org/packages/dc/76/386ce489b687c69915a932b4f21bed921bbe6a3837f93fe3218536130ec8/docling_parse-3.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd1d4460303d1d7162c0f45f141bef6bfb39df235c7b7e9ba842a921c74e3176", size = 22375326 }, - { url = "https://files.pythonhosted.org/packages/13/7c/07af8772822ba7bbfc5cd1113198ed4bfb66fce4a8932970f86d75ea63d0/docling_parse-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:970a44d2d79a007957e91b2501c0572e2d0f4623c3f3685ba7a8b09bf39df1b1", size = 22442885 }, - { url = "https://files.pythonhosted.org/packages/db/76/ed75396653ab90da824f32167fd2efffdd1435be585dfd58b1682624202e/docling_parse-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:b9a80f4b4161c57797f061fa111ce797eeacf52bce9bebba7919a8ea7fcc26ac", size = 23233077 }, - { url = "https://files.pythonhosted.org/packages/cc/dc/2c30205f3a251203f61efebbf25aad3caecb9cee9856d5a09b702187081a/docling_parse-3.1.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:993e27c953bb0fa53100aea694442c0147f523044c02fd7c193b68ac9070d507", size = 22045843 }, - { url = "https://files.pythonhosted.org/packages/c6/7c/5ea51f2ee5fcb316f04ad2b5e20f8583d3165776ca6511548379b28a0f85/docling_parse-3.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0a19d828c466c653ffdaff1d7da2cd7f4aab212360c477629463da8f0e150c05", size = 21927221 }, - { url = "https://files.pythonhosted.org/packages/85/cd/cc837743a01838e9597f4047a62988e77c4f48df9f87675f9efb70ca2c02/docling_parse-3.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd868b8ee626b9e74c8d2bb3ad8e7f036dd6839250bdebe7a4c0a1657aaa8dbd", size = 22375838 }, - { url = "https://files.pythonhosted.org/packages/72/65/5d64fb329f2741b46f58029f6ab058d9b8f1668af5d464e01149e64a0115/docling_parse-3.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c1627a7c16527bf2003d764a5f8aa23337d8c56085d3d4717e65d2c940b242", size = 22443769 }, - { url = "https://files.pythonhosted.org/packages/31/41/419f9343d37ffa4ba18084c70951bb60f3453be52443587149fa2d45bf4d/docling_parse-3.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:67175719913fa3adb6476a0753383b4a0d156e286927ae641085ff99f85320c4", size = 23232285 }, - { url = "https://files.pythonhosted.org/packages/28/7b/99d646f4760218a75ac326c6973d458025262376e0cae0fbdc30ff9da4b7/docling_parse-3.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d83d07744e52f1b4cfaf50f21c47b2d0df6d285a7e3d4297e09ba159b36b9277", size = 25014169 }, +sdist = { url = "https://files.pythonhosted.org/packages/bb/8d/c8e5c8c903180dd05c797382933be2855184b7715eeff22cfc13c5d729ff/docling_parse-3.1.1.tar.gz", hash = "sha256:fb62c85132d35edd91cee5c093b9e45d981ca7fa8ba0c560f0c3ce56993e4f8e", size = 34652565 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/b0/d9c53da2a50e08fac123223b677f23aebc4474baade748126c1c457e304c/docling_parse-3.1.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:cccf1b7912ece508f75bc004dff392298fc956f33c62f3a48db6c0a7976d808a", size = 22043978 }, + { url = "https://files.pythonhosted.org/packages/e0/9c/470a4729272ae642e62660cd8e857b03169d1e24d715caf8201f0ab866dc/docling_parse-3.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3537f3bbd2152a3f2c25142ac93b9db08e68eca923863dca272a0f588739855d", size = 21925616 }, + { url = "https://files.pythonhosted.org/packages/54/f1/b53f8560b4c151a1260956bfbfa7adfb6b6d5b13307917e3d8ff3ff96b88/docling_parse-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dffee503cf6be3343df2d9421067585a88543ed5c94e39662a3cfc5cd2b794a", size = 22374965 }, + { url = "https://files.pythonhosted.org/packages/c5/2c/bc8679107f5103b58bcf8ca19f9f99ed700bbd6c487ed0f6844dd84f4133/docling_parse-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c856f0dbe8f10296442f749109d5d5dc86f10151a0e51e8629b32d053d0e61c2", size = 22442479 }, + { url = "https://files.pythonhosted.org/packages/bc/14/58543a816749b882ec1c02b3b4070eea9287cd4aa5ce32eab052b602093b/docling_parse-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5693ef8cba6096d8ed1039f61a663ea74bac711d06616b51254c495ebb3eb53d", size = 23230944 }, + { url = "https://files.pythonhosted.org/packages/13/c8/625d87b64abcad2478b66ff12176be1a7b2193e1a797d80759ad7b28c84e/docling_parse-3.1.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:8e586bcd24d7e3ba26ebdace4cb15d70498fb4656fc9f6f20f24b007de0628d6", size = 22045058 }, + { url = "https://files.pythonhosted.org/packages/71/82/88743a41c72b94354af22b7f3163e03002e097aa8aa554e5ede0db366159/docling_parse-3.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1e8137020ed9bff26eb70dbbdb42c62f3e87c81001e3ecd41e39b3ec3631d7bf", size = 21926887 }, + { url = "https://files.pythonhosted.org/packages/cc/bf/8a8d35dc7c099bb2b9797a1ded2b7c0a61223816915109932cf6be34a7c9/docling_parse-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bec98497626202a6fa7e2a715814414131b53b32cd2999e540edf87a60e45ef5", size = 22376668 }, + { url = "https://files.pythonhosted.org/packages/c3/2a/175d760b523e25d766ef48cd4c57f5c92e46cd785e5c252720ea305c3595/docling_parse-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:537e0eec387a9cc3e35492752efc561982b3cf02b3d571ca46c4a0af3a884068", size = 22444485 }, + { url = "https://files.pythonhosted.org/packages/95/2d/680eb1a8af173f7e4cf445c3884b4a7f6fc2eaa6824afed2b47f18413f5a/docling_parse-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ca3d45a0e9cd41c5e6e0002eaa1a3478bc065b58dc7d38a114eb5ad37f762934", size = 23232801 }, + { url = "https://files.pythonhosted.org/packages/8a/73/6a1b70d2e99e638b2648c2ace836d665b7c1c525188af9e5c3140f70b6ff/docling_parse-3.1.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:008d751f4fdd82a3cbe3e8d4abaa4d5cf0d0cb35d16334c5dfc22a62001c780b", size = 22045481 }, + { url = "https://files.pythonhosted.org/packages/22/77/4726236ac1b64c3e2924e75205f9342a5217739509774bbcd0abcfa6e931/docling_parse-3.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a06a0e4b403387e9c4e79d388aa63ace75d1aa855018238634ec8ce262369ffa", size = 21926854 }, + { url = "https://files.pythonhosted.org/packages/47/86/ff2c069bb97cc7cf5db7b699ae4fdd5015d7dff3a068e24caf26a5df689e/docling_parse-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b802ae9c2464fc0354721d0ef3c73f573c202fa1995276afceaf5882bb894583", size = 22375038 }, + { url = "https://files.pythonhosted.org/packages/78/f5/4d3b6e02e5c6999300a428ccb2904b8603f3bb080ea327ba730ee3bd8645/docling_parse-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dc796b738e4ba3663084ee9fa4fe749e8aa27154bf459a3531e5a5b9c774b6b", size = 22442592 }, + { url = "https://files.pythonhosted.org/packages/2f/ac/585271a7bebb3990ed1537c54e4040a945162151513a87f0855a1ffaabcc/docling_parse-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:dbad418bedc7706c230ae8212cd08a41400762104be3df512ffe05d0f468d6e2", size = 23232794 }, + { url = "https://files.pythonhosted.org/packages/16/ff/3be6ef2ec5fbbb8b3db64c86be78eebb18e56c228991b3aa914da061d2e9/docling_parse-3.1.1-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:b32f46810f7c05de3e1fd13c2bbe58291710b90777baefefd8ed04118be319db", size = 22045562 }, + { url = "https://files.pythonhosted.org/packages/d7/ed/15d389d8780be16cffec0f7732dd41e72d0367ca27a91154669cc8da889b/docling_parse-3.1.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:10ffbfe70a0eda2cac42a8fb2ebbe0adafdcfeb173ecaa0e7e0e7769cc020449", size = 21926940 }, + { url = "https://files.pythonhosted.org/packages/23/cd/9bb3295f291aa43f002d51fafa29886cec2107eec1942d6c9a2250890749/docling_parse-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4389f552297c0798bfc9b4b0116461d7e154340311b143264e9e48808f19884", size = 22375549 }, + { url = "https://files.pythonhosted.org/packages/15/ff/f4d80359f8522f5561b8c25d7a74549c79b6fb9184bd72a9c66577654874/docling_parse-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a716412318f5136fde397925a06b3d1cc3fce33f060175574d09576cbfc901f1", size = 22443476 }, + { url = "https://files.pythonhosted.org/packages/62/86/22942662a33f4ad03c971274c338337241891286dc3e580af46c95788fd1/docling_parse-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:7ff36910971bc015270c4aaae5f01d783970a0af840ca84070a41564759048c5", size = 23232000 }, + { url = "https://files.pythonhosted.org/packages/30/2c/7310fd0974389a62a1c27e4fd9907966dd7cde27020ecac5ffa75b27d2ad/docling_parse-3.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:97bbd6b45681c643d1ca2917d4c6813735a3527ee2af2823ebdf3882545539bf", size = 25013883 }, ] [[package]] @@ -690,6 +725,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "e2b" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "httpcore" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/28/c05fe7a49005e2e98017941c05df15e2b096e8d57c1abcf2fca05e11abef/e2b-1.0.5.tar.gz", hash = "sha256:43c82705af7b7d4415c2510ff77dab4dc075351e0b769d6adf8e0d7bb4868d13", size = 44374 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/80/35a7050f011f603599ce3d579fe3a5f424c9256574e132f4b75260d9ffb5/e2b-1.0.5-py3-none-any.whl", hash = "sha256:a71bdec46f33d3e38e87d475d7fd2939bd7b6b753b819c9639ca211cd375b79e", size = 81717 }, +] + +[[package]] +name = "e2b-code-interpreter" +version = "1.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "e2b" }, + { name = "httpx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9a/0e/95be4c53ee2fa6879d517ce7bca1656249e6bcdc377422b19ad636c59748/e2b_code_interpreter-1.0.4.tar.gz", hash = "sha256:fec5651d98ca0d03dd038c5df943a0beaeb59c6d422112356f55f2b662d8dea1", size = 9273 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/99/ce5c3953db2818976a640bce5af2dbc347fae0b00b105728b6f110a696e5/e2b_code_interpreter-1.0.4-py3-none-any.whl", hash = "sha256:e8cea4946b3457072a524250aee712f7f8d44834b91cd9c13da3bdf96eda1a6e", size = 12062 }, +] + [[package]] name = "easyocr" version = "1.7.2" @@ -732,20 +799,20 @@ wheels = [ [[package]] name = "executing" -version = "2.1.0" +version = "2.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, ] [[package]] name = "filelock" -version = "3.16.1" +version = "3.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037 } +sdist = { url = "https://files.pythonhosted.org/packages/dc/9c/0b15fb47b464e1b663b1acd1253a062aa5feecb07d4e597daea542ebd2b5/filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e", size = 18027 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, + { url = "https://files.pythonhosted.org/packages/89/ec/00d68c4ddfedfe64159999e5f8a98fb8442729a63e2077eb9dcd89623d27/filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338", size = 16164 }, ] [[package]] @@ -850,6 +917,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/48/e791a7ed487dbb9729ef32bb5d1af16693d8925f4366befef54119b2e576/furo-2024.8.6-py3-none-any.whl", hash = "sha256:6cd97c58b47813d3619e63e9081169880fbe331f0ca883c871ff1f3f11814f5c", size = 341333 }, ] +[[package]] +name = "fuzzywuzzy" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/4b/0a002eea91be6048a2b5d53c5f1b4dafd57ba2e36eea961d05086d7c28ce/fuzzywuzzy-0.18.0.tar.gz", hash = "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", size = 28888 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl", hash = "sha256:928244b28db720d1e0ee7587acf660ea49d7e4c632569cad4f1cd7e68a5f0993", size = 18272 }, +] + +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 }, +] + +[[package]] +name = "gitpython" +version = "3.1.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, +] + [[package]] name = "h11" version = "0.14.0" @@ -1220,22 +1320,98 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097 }, ] +[[package]] +name = "levenshtein" +version = "0.26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "rapidfuzz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/e6/79807d3b59a67dd78bb77072ca6a28d8db0935161fecf935e6c38c5f6825/levenshtein-0.26.1.tar.gz", hash = "sha256:0d19ba22330d50609b2349021ec3cf7d905c6fe21195a2d0d876a146e7ed2575", size = 374307 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/ae/af5f9e9f06052719df6af46d7a7fee3675fd2dea0e2845cc0f4968cf853f/levenshtein-0.26.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8dc4a4aecad538d944a1264c12769c99e3c0bf8e741fc5e454cc954913befb2e", size = 177032 }, + { url = "https://files.pythonhosted.org/packages/bb/a6/be36c1d43cccd032b359ba2fa66dd299bac0cd226f263672332738535553/levenshtein-0.26.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec108f368c12b25787c8b1a4537a1452bc53861c3ee4abc810cc74098278edcd", size = 157539 }, + { url = "https://files.pythonhosted.org/packages/d1/76/13df26b47c53db1cf01c40bae1483b13919d6eab12cede3b93b018927229/levenshtein-0.26.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69229d651c97ed5b55b7ce92481ed00635cdbb80fbfb282a22636e6945dc52d5", size = 153298 }, + { url = "https://files.pythonhosted.org/packages/f2/d9/c02fd7ec98d55df51c643d0475b859fab19a974eb44e5ca72f642dbfeffd/levenshtein-0.26.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79dcd157046d62482a7719b08ba9e3ce9ed3fc5b015af8ea989c734c702aedd4", size = 186766 }, + { url = "https://files.pythonhosted.org/packages/7a/71/44adaafadc5c93845048b88426ab5e2a8414efce7026478cad115fd08f92/levenshtein-0.26.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f53f9173ae21b650b4ed8aef1d0ad0c37821f367c221a982f4d2922b3044e0d", size = 187546 }, + { url = "https://files.pythonhosted.org/packages/2d/7e/24593d50e9e0911c96631a123760b96d1dabbcf1fc55a300648d4f0240dd/levenshtein-0.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3956f3c5c229257dbeabe0b6aacd2c083ebcc1e335842a6ff2217fe6cc03b6b", size = 162601 }, + { url = "https://files.pythonhosted.org/packages/54/98/2285860f07c519af3bb1af29cc4a51c3fd8c028836887615c776f6bb28d4/levenshtein-0.26.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1e83af732726987d2c4cd736f415dae8b966ba17b7a2239c8b7ffe70bfb5543", size = 249164 }, + { url = "https://files.pythonhosted.org/packages/28/f7/87008ca57377f2f296a3b9b87b46fa80a4a471c1d3de3ea4ff37acc65b5a/levenshtein-0.26.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4f052c55046c2a9c9b5f742f39e02fa6e8db8039048b8c1c9e9fdd27c8a240a1", size = 1077613 }, + { url = "https://files.pythonhosted.org/packages/7d/ca/5f2b3c4b181f4e97805ee839c47cb99c8048bf7934358af8c3d6a07fb6c2/levenshtein-0.26.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9895b3a98f6709e293615fde0dcd1bb0982364278fa2072361a1a31b3e388b7a", size = 1331030 }, + { url = "https://files.pythonhosted.org/packages/b3/f4/de5a779d178e489906fd39d7b2bdb782f80a98affc57e9d40a723b9ee89c/levenshtein-0.26.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a3777de1d8bfca054465229beed23994f926311ce666f5a392c8859bb2722f16", size = 1207001 }, + { url = "https://files.pythonhosted.org/packages/f8/61/78b25ef514a23735ae0baf230af668f16d6f5e1466c4db72a4de0e233768/levenshtein-0.26.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:81c57e1135c38c5e6e3675b5e2077d8a8d3be32bf0a46c57276c092b1dffc697", size = 1355999 }, + { url = "https://files.pythonhosted.org/packages/b9/e8/a488dbb99726e08ac05ad3359e7db79e35c2c4e4bafbaaf081ae140c7de3/levenshtein-0.26.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:91d5e7d984891df3eff7ea9fec8cf06fdfacc03cd074fd1a410435706f73b079", size = 1135174 }, + { url = "https://files.pythonhosted.org/packages/52/c1/79693b33ab4c5ba04df8b4d116c2ae4cfaa71e08b2cf2b8cd93d5fa37b07/levenshtein-0.26.1-cp310-cp310-win32.whl", hash = "sha256:f48abff54054b4142ad03b323e80aa89b1d15cabc48ff49eb7a6ff7621829a56", size = 87111 }, + { url = "https://files.pythonhosted.org/packages/e6/ed/5250c0891f6a99e41e715ce379b77863d66356eae7519e3626514f2729b6/levenshtein-0.26.1-cp310-cp310-win_amd64.whl", hash = "sha256:79dd6ad799784ea7b23edd56e3bf94b3ca866c4c6dee845658ee75bb4aefdabf", size = 98062 }, + { url = "https://files.pythonhosted.org/packages/4f/b3/58f69cbd9f21fe7ec54a71059b3e8fdb37c43781b31a36f49c973bd387c5/levenshtein-0.26.1-cp310-cp310-win_arm64.whl", hash = "sha256:3351ddb105ef010cc2ce474894c5d213c83dddb7abb96400beaa4926b0b745bd", size = 87976 }, + { url = "https://files.pythonhosted.org/packages/af/b4/86e447173ca8d936b7ef270d21952a0053e799040e73b843a4a5ac9a15a1/levenshtein-0.26.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:44c51f5d33b3cfb9db518b36f1288437a509edd82da94c4400f6a681758e0cb6", size = 177037 }, + { url = "https://files.pythonhosted.org/packages/27/b3/e15e14e5836dfc23ed014c21b307cbf77b3c6fd75e11d0675ce9a0d43b31/levenshtein-0.26.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56b93203e725f9df660e2afe3d26ba07d71871b6d6e05b8b767e688e23dfb076", size = 157478 }, + { url = "https://files.pythonhosted.org/packages/32/f1/f4d0904c5074e4e9d33dcaf304144e02eae9eec9d61b63bf17b1108ce228/levenshtein-0.26.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:270d36c5da04a0d89990660aea8542227cbd8f5bc34e9fdfadd34916ff904520", size = 153873 }, + { url = "https://files.pythonhosted.org/packages/f9/0d/cd5abe809421ce0d4a2cae60fd2fdf62cb43890068515a8a0069e2b17894/levenshtein-0.26.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:480674c05077eeb0b0f748546d4fcbb386d7c737f9fff0010400da3e8b552942", size = 186850 }, + { url = "https://files.pythonhosted.org/packages/a8/69/03f4266ad83781f2602b1976a2e5a98785c148f9bfc77c343e5aa1840f64/levenshtein-0.26.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13946e37323728695ba7a22f3345c2e907d23f4600bc700bf9b4352fb0c72a48", size = 187527 }, + { url = "https://files.pythonhosted.org/packages/36/fa/ec3be1162b1a757f80e713220470fe5b4db22e23f886f50ac59a48f0a84d/levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceb673f572d1d0dc9b1cd75792bb8bad2ae8eb78a7c6721e23a3867d318cb6f2", size = 162673 }, + { url = "https://files.pythonhosted.org/packages/9e/d6/dc8358b6a4174f413532aa27463dc4d167ac25742826f58916bb6e6417b1/levenshtein-0.26.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42d6fa242e3b310ce6bfd5af0c83e65ef10b608b885b3bb69863c01fb2fcff98", size = 250413 }, + { url = "https://files.pythonhosted.org/packages/57/5e/a87bf39686482a1df000fdc265fdd812f0cd316d5fb0a25f52654504a82b/levenshtein-0.26.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b8b68295808893a81e0a1dbc2274c30dd90880f14d23078e8eb4325ee615fc68", size = 1078713 }, + { url = "https://files.pythonhosted.org/packages/c5/04/30ab2f27c4ff7d6d98b3bb6bf8541521535ad2d05e50ac8fd00ab701c080/levenshtein-0.26.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b01061d377d1944eb67bc40bef5d4d2f762c6ab01598efd9297ce5d0047eb1b5", size = 1331174 }, + { url = "https://files.pythonhosted.org/packages/e4/68/9c7f60ccb097a86420d058dcc3f575e6b3d663b3a5cde3651443f7087e14/levenshtein-0.26.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9d12c8390f156745e533d01b30773b9753e41d8bbf8bf9dac4b97628cdf16314", size = 1207733 }, + { url = "https://files.pythonhosted.org/packages/64/21/222f54a1a654eca1c1cd015d32d972d70529eb218d469d516f13eac2149d/levenshtein-0.26.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:48825c9f967f922061329d1481b70e9fee937fc68322d6979bc623f69f75bc91", size = 1356116 }, + { url = "https://files.pythonhosted.org/packages/6f/65/681dced2fa798ea7882bff5682ab566689a4920006ed9aca4fd8d1edb2d2/levenshtein-0.26.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d8ec137170b95736842f99c0e7a9fd8f5641d0c1b63b08ce027198545d983e2b", size = 1135459 }, + { url = "https://files.pythonhosted.org/packages/a1/e8/1ff8a634c428ed908d20482f77491cca08fa16c96738ad82d9219da138a1/levenshtein-0.26.1-cp311-cp311-win32.whl", hash = "sha256:798f2b525a2e90562f1ba9da21010dde0d73730e277acaa5c52d2a6364fd3e2a", size = 87265 }, + { url = "https://files.pythonhosted.org/packages/8f/fb/44e9747558a7381ea6736e10ac2f871414007915afb94efac423e68cf441/levenshtein-0.26.1-cp311-cp311-win_amd64.whl", hash = "sha256:55b1024516c59df55f1cf1a8651659a568f2c5929d863d3da1ce8893753153bd", size = 98518 }, + { url = "https://files.pythonhosted.org/packages/04/90/c476a74d8ec25d680b9cbf51966d638623a82a2fd4e99b988a383f22a681/levenshtein-0.26.1-cp311-cp311-win_arm64.whl", hash = "sha256:e52575cbc6b9764ea138a6f82d73d3b1bc685fe62e207ff46a963d4c773799f6", size = 88086 }, + { url = "https://files.pythonhosted.org/packages/4c/53/3685ee7fbe9b8eb4b82d8045255e59dd6943f94e8091697ef3808e7ecf63/levenshtein-0.26.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cc741ca406d3704dc331a69c04b061fc952509a069b79cab8287413f434684bd", size = 176447 }, + { url = "https://files.pythonhosted.org/packages/82/7f/7d6fe9b76bd030200f8f9b162f3de862d597804d292af292ec3ce9ae8bee/levenshtein-0.26.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:821ace3b4e1c2e02b43cf5dc61aac2ea43bdb39837ac890919c225a2c3f2fea4", size = 157589 }, + { url = "https://files.pythonhosted.org/packages/bc/d3/44539e952df93c5d88a95a0edff34af38e4f87330a76e8335bfe2c0f31bf/levenshtein-0.26.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92694c9396f55d4c91087efacf81297bef152893806fc54c289fc0254b45384", size = 153306 }, + { url = "https://files.pythonhosted.org/packages/ba/fe/21443c0c50824314e2d2ce7e1e9cd11d21b3643f3c14da156b15b4d399c7/levenshtein-0.26.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:51ba374de7a1797d04a14a4f0ad3602d2d71fef4206bb20a6baaa6b6a502da58", size = 184409 }, + { url = "https://files.pythonhosted.org/packages/f0/7b/c95066c64bb18628cf7488e0dd6aec2b7cbda307d93ba9ede68a21af2a7b/levenshtein-0.26.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7aa5c3327dda4ef952769bacec09c09ff5bf426e07fdc94478c37955681885b", size = 193134 }, + { url = "https://files.pythonhosted.org/packages/36/22/5f9760b135bdefb8cf8d663890756136754db03214f929b73185dfa33f05/levenshtein-0.26.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e2517e8d3c221de2d1183f400aed64211fcfc77077b291ed9f3bb64f141cdc", size = 162266 }, + { url = "https://files.pythonhosted.org/packages/11/50/6b1a5f3600caae40db0928f6775d7efc62c13dec2407d3d540bc4afdb72c/levenshtein-0.26.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9092b622765c7649dd1d8af0f43354723dd6f4e570ac079ffd90b41033957438", size = 246339 }, + { url = "https://files.pythonhosted.org/packages/26/eb/ede282fcb495570898b39a0d2f21bbc9be5587d604c93a518ece80f3e7dc/levenshtein-0.26.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc16796c85d7d8b259881d59cc8b5e22e940901928c2ff6924b2c967924e8a0b", size = 1077937 }, + { url = "https://files.pythonhosted.org/packages/35/41/eebe1c4a75f592d9bdc3c2595418f083bcad747e0aec52a1a9ffaae93f5c/levenshtein-0.26.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4370733967f5994ceeed8dc211089bedd45832ee688cecea17bfd35a9eb22b9", size = 1330607 }, + { url = "https://files.pythonhosted.org/packages/12/8e/4d34b1857adfd69c2a72d84bca1b8538d4cfaaf6fddd8599573f4281a9d1/levenshtein-0.26.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3535ecfd88c9b283976b5bc61265855f59bba361881e92ed2b5367b6990c93fe", size = 1197505 }, + { url = "https://files.pythonhosted.org/packages/c0/7b/6afcda1b0a0622cedaa4f7a5b3507c2384a7358fc051ccf619e5d2453bf2/levenshtein-0.26.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:90236e93d98bdfd708883a6767826fafd976dac8af8fc4a0fb423d4fa08e1bf0", size = 1352832 }, + { url = "https://files.pythonhosted.org/packages/21/5e/0ed4e7b5c820b6bc40e2c391633292c3666400339042a3d306f0dc8fdcb4/levenshtein-0.26.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:04b7cabb82edf566b1579b3ed60aac0eec116655af75a3c551fee8754ffce2ea", size = 1135970 }, + { url = "https://files.pythonhosted.org/packages/c9/91/3ff1abacb58642749dfd130ad855370e01b9c7aeaa73801964361f6e355f/levenshtein-0.26.1-cp312-cp312-win32.whl", hash = "sha256:ae382af8c76f6d2a040c0d9ca978baf461702ceb3f79a0a3f6da8d596a484c5b", size = 87599 }, + { url = "https://files.pythonhosted.org/packages/7d/f9/727f3ba7843a3fb2a0f3db825358beea2a52bc96258874ee80cb2e5ecabb/levenshtein-0.26.1-cp312-cp312-win_amd64.whl", hash = "sha256:fd091209798cfdce53746f5769987b4108fe941c54fb2e058c016ffc47872918", size = 98809 }, + { url = "https://files.pythonhosted.org/packages/d4/f4/f87f19222d279dbac429b9bc7ccae271d900fd9c48a581b8bc180ba6cd09/levenshtein-0.26.1-cp312-cp312-win_arm64.whl", hash = "sha256:7e82f2ea44a81ad6b30d92a110e04cd3c8c7c6034b629aca30a3067fa174ae89", size = 88227 }, + { url = "https://files.pythonhosted.org/packages/7e/d6/b4b522b94d7b387c023d22944590befc0ac6b766ac6d197afd879ddd77fc/levenshtein-0.26.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:790374a9f5d2cbdb30ee780403a62e59bef51453ac020668c1564d1e43438f0e", size = 175836 }, + { url = "https://files.pythonhosted.org/packages/25/76/06d1e26a8e6d0de68ef4a157dd57f6b342413c03550309e4aa095a453b28/levenshtein-0.26.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7b05c0415c386d00efda83d48db9db68edd02878d6dbc6df01194f12062be1bb", size = 157036 }, + { url = "https://files.pythonhosted.org/packages/7e/23/21209a9e96b878aede3bea104533866762ba621e36fc344aa080db5feb02/levenshtein-0.26.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3114586032361722ddededf28401ce5baf1cf617f9f49fb86b8766a45a423ff", size = 153326 }, + { url = "https://files.pythonhosted.org/packages/06/38/9fc68685fffd8863b13864552eba8f3eb6a82a4dc558bf2c6553c2347d6c/levenshtein-0.26.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2532f8a13b68bf09f152d906f118a88da2063da22f44c90e904b142b0a53d534", size = 183693 }, + { url = "https://files.pythonhosted.org/packages/f6/82/ccd7bdd7d431329da025e649c63b731df44f8cf31b957e269ae1c1dc9a8e/levenshtein-0.26.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:219c30be6aa734bf927188d1208b7d78d202a3eb017b1c5f01ab2034d2d4ccca", size = 190581 }, + { url = "https://files.pythonhosted.org/packages/6e/c5/57f90b4aea1f89f853872b27a5a5dbce37b89ffeae42c02060b3e82038b2/levenshtein-0.26.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397e245e77f87836308bd56305bba630010cd8298c34c4c44bd94990cdb3b7b1", size = 162446 }, + { url = "https://files.pythonhosted.org/packages/fc/da/df6acca738921f896ce2d178821be866b43a583f85e2d1de63a4f8f78080/levenshtein-0.26.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aeff6ea3576f72e26901544c6c55c72a7b79b9983b6f913cba0e9edbf2f87a97", size = 247123 }, + { url = "https://files.pythonhosted.org/packages/22/fb/f44a4c0d7784ccd32e4166714fea61e50f62b232162ae16332f45cb55ab2/levenshtein-0.26.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a19862e3539a697df722a08793994e334cd12791e8144851e8a1dee95a17ff63", size = 1077437 }, + { url = "https://files.pythonhosted.org/packages/f0/5e/d9b9e7daa13cc7e2184a3c2422bb847f05d354ce15ba113b20d83e9ab366/levenshtein-0.26.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:dc3b5a64f57c3c078d58b1e447f7d68cad7ae1b23abe689215d03fc434f8f176", size = 1330362 }, + { url = "https://files.pythonhosted.org/packages/bf/67/480d85bb516798014a6849be0225b246f35df4b54499c348c9c9e311f936/levenshtein-0.26.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bb6c7347424a91317c5e1b68041677e4c8ed3e7823b5bbaedb95bffb3c3497ea", size = 1198721 }, + { url = "https://files.pythonhosted.org/packages/9a/7d/889ff7d86903b6545665655627113d263c88c6d596c68fb09a640ee4f0a7/levenshtein-0.26.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b817376de4195a207cc0e4ca37754c0e1e1078c2a2d35a6ae502afde87212f9e", size = 1351820 }, + { url = "https://files.pythonhosted.org/packages/b9/29/cd42273150f08c200ed2d1879486d73502ee35265f162a77952f101d93a0/levenshtein-0.26.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7b50c3620ff47c9887debbb4c154aaaac3e46be7fc2e5789ee8dbe128bce6a17", size = 1135747 }, + { url = "https://files.pythonhosted.org/packages/1d/90/cbcfa3dd86023e82036662a19fec2fcb48782d3f9fa322d44dc898d95a5d/levenshtein-0.26.1-cp313-cp313-win32.whl", hash = "sha256:9fb859da90262eb474c190b3ca1e61dee83add022c676520f5c05fdd60df902a", size = 87318 }, + { url = "https://files.pythonhosted.org/packages/83/73/372edebc79fd09a8b2382cf1244d279ada5b795124f1e1c4fc73d9fbb00f/levenshtein-0.26.1-cp313-cp313-win_amd64.whl", hash = "sha256:8adcc90e3a5bfb0a463581d85e599d950fe3c2938ac6247b29388b64997f6e2d", size = 98418 }, + { url = "https://files.pythonhosted.org/packages/b2/6d/f0160ea5a7bb7a62b3b3d56e9fc5024b440cb59555a90be2347abf2e7888/levenshtein-0.26.1-cp313-cp313-win_arm64.whl", hash = "sha256:c2599407e029865dc66d210b8804c7768cbdbf60f061d993bb488d5242b0b73e", size = 87792 }, + { url = "https://files.pythonhosted.org/packages/c9/40/11a601baf1731d6b6927890bb7107f6cf77357dec8a22f269cd8f4ab8631/levenshtein-0.26.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6cf8f1efaf90ca585640c5d418c30b7d66d9ac215cee114593957161f63acde0", size = 172550 }, + { url = "https://files.pythonhosted.org/packages/74/1c/070757904b9fb4dfddaf9f43da8e8d9fb6feabd660631cc9e4cb49364d2b/levenshtein-0.26.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d5b2953978b8c158dd5cd93af8216a5cfddbf9de66cf5481c2955f44bb20767a", size = 154546 }, + { url = "https://files.pythonhosted.org/packages/31/7e/ef5538895aa96d6f59b5a6ed3c40c3db3b1b0df45807bd23eae250f380b8/levenshtein-0.26.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b952b3732c4631c49917d4b15d78cb4a2aa006c1d5c12e2a23ba8e18a307a055", size = 152897 }, + { url = "https://files.pythonhosted.org/packages/94/65/28fb5c59871a673f93e72c00c33c43bcc27eff6f9be5e515252e6da28a7f/levenshtein-0.26.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07227281e12071168e6ae59238918a56d2a0682e529f747b5431664f302c0b42", size = 160411 }, + { url = "https://files.pythonhosted.org/packages/4c/c7/b8fe968f92ed672cd346d38f4077586eb7ff63bade2e8d7c93a9259573c4/levenshtein-0.26.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8191241cd8934feaf4d05d0cc0e5e72877cbb17c53bbf8c92af9f1aedaa247e9", size = 247483 }, + { url = "https://files.pythonhosted.org/packages/f3/98/c119974fdce4808afdf3622230759c871bc4c73287cf34b338db2be936b8/levenshtein-0.26.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9e70d7ee157a9b698c73014f6e2b160830e7d2d64d2e342fefc3079af3c356fc", size = 95854 }, +] + [[package]] name = "lionagi" -version = "0.8.6" +version = "0.8.7" source = { editable = "." } dependencies = [ { name = "aiocache" }, { name = "aiohttp" }, { name = "jinja2" }, - { name = "litellm" }, { name = "pandas" }, { name = "pillow" }, { name = "pydantic" }, { name = "python-dotenv" }, ] -[package.dev-dependencies] +[package.optional-dependencies] dev = [ { name = "black", extra = ["jupyter"] }, { name = "furo" }, @@ -1251,39 +1427,50 @@ dev = [ ] tools = [ { name = "docling" }, + { name = "e2b-code-interpreter" }, + { name = "fuzzywuzzy" }, + { name = "gitpython" }, + { name = "litellm" }, + { name = "pygithub" }, + { name = "pytest-mock" }, + { name = "python-levenshtein" }, + { name = "rich" }, ] [package.metadata] requires-dist = [ { name = "aiocache", specifier = ">=0.12.0" }, { name = "aiohttp", specifier = ">=3.11.0" }, + { name = "black", extras = ["jupyter"], marker = "extra == 'dev'", specifier = ">=24.10.0" }, + { name = "docling", marker = "extra == 'tools'", specifier = ">=2.15.1" }, + { name = "e2b-code-interpreter", marker = "extra == 'tools'", specifier = ">=0.1.0" }, + { name = "furo", marker = "extra == 'dev'", specifier = ">=2024.8.6" }, + { name = "fuzzywuzzy", marker = "extra == 'tools'", specifier = ">=0.18.0" }, + { name = "gitpython", marker = "extra == 'tools'", specifier = ">=3.1.44" }, + { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.29.5" }, + { name = "ipython", marker = "extra == 'dev'", specifier = ">=8.31.0" }, + { name = "ipywidgets", marker = "extra == 'dev'", specifier = ">=8.1.5" }, + { name = "isort", marker = "extra == 'dev'", specifier = ">=5.13.2" }, { name = "jinja2", specifier = ">=3.1.0" }, - { name = "litellm", specifier = ">=1.55.3" }, + { name = "litellm", marker = "extra == 'tools'", specifier = ">=1.55.3" }, { name = "pandas", specifier = ">=2.0.0" }, { name = "pillow", specifier = ">=10.0.0" }, + { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.1" }, { name = "pydantic", specifier = ">=2.0.0" }, + { name = "pygithub", marker = "extra == 'tools'", specifier = ">=2.2.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.4" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.25.0" }, + { name = "pytest-mock", marker = "extra == 'tools'", specifier = ">=3.12.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "python-levenshtein", marker = "extra == 'tools'", specifier = ">=0.25.0" }, + { name = "rich", marker = "extra == 'tools'", specifier = ">=13.7.0" }, + { name = "sphinx", marker = "extra == 'dev'", specifier = ">=8.1.3" }, + { name = "sphinx-autobuild", marker = "extra == 'dev'", specifier = ">=2024.10.3" }, ] -[package.metadata.requires-dev] -dev = [ - { name = "black", extras = ["jupyter"], specifier = ">=24.10.0" }, - { name = "furo", specifier = ">=2024.8.6" }, - { name = "ipykernel", specifier = ">=6.29.5" }, - { name = "ipython", specifier = ">=8.31.0" }, - { name = "ipywidgets", specifier = ">=8.1.5" }, - { name = "isort", specifier = ">=5.13.2" }, - { name = "pre-commit", specifier = ">=4.0.1" }, - { name = "pytest", specifier = ">=8.3.4" }, - { name = "pytest-asyncio", specifier = ">=0.25.0" }, - { name = "sphinx", specifier = ">=8.1.3" }, - { name = "sphinx-autobuild", specifier = ">=2024.10.3" }, -] -tools = [{ name = "docling", specifier = ">=2.15.1" }] - [[package]] name = "litellm" -version = "1.59.0" +version = "1.59.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1298,9 +1485,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/0c/d6a59dad534f4f0636a0113c8181fee1062f26b79f09b43fd2e663ae642e/litellm-1.59.0.tar.gz", hash = "sha256:140eecb47952558414d00f7a259fe303fe5f0d073973a28f488fc6938cc45660", size = 6362311 } +sdist = { url = "https://files.pythonhosted.org/packages/d1/e6/5e45636e1eb6833f5048ff4ec5b8d3a899ee129dd0b6aa728cf0349f3d9b/litellm-1.59.3.tar.gz", hash = "sha256:5e625e07a97346704d67f04265253acfc6a0c7e3afad82c2ed57fcdaae066add", size = 6378806 } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/0b/9414df775c452829865ca93cce6a15d37ed47a089192e705a44b621e8166/litellm-1.59.0-py3-none-any.whl", hash = "sha256:b0c8bdee556d5dc2f9c703f7dc831574ea2e339d2e762dd626d014c170b8b587", size = 6653516 }, + { url = "https://files.pythonhosted.org/packages/d3/d8/f8c64a54e43ea26604e88f1ec7b80bfa56a9a71599232170cede2d0ea7d9/litellm-1.59.3-py3-none-any.whl", hash = "sha256:17c6fe3b9ccd54fbdb73d2389268864296f9601c5cb29d80061489e0a984e5c0", size = 6666184 }, ] [[package]] @@ -1849,7 +2036,7 @@ wheels = [ [[package]] name = "openai" -version = "1.59.9" +version = "1.60.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1861,9 +2048,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ec/2d/04faa92bac0341649223398503db4415d2f658a757d9d32bb68f3378ddd0/openai-1.59.9.tar.gz", hash = "sha256:ec1a20b0351b4c3e65c6292db71d8233515437c6065efd4fd50edeb55df5f5d2", size = 347134 } +sdist = { url = "https://files.pythonhosted.org/packages/d4/2d/9bdf4435d7669b4d027d6d69b4ac82f6be76153d9e90d3155d4224626a29/openai-1.60.0.tar.gz", hash = "sha256:7fa536cd4b644718645b874d2706e36dbbef38b327e42ca0623275da347ee1a9", size = 347844 } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/b4/57f1954a4560092ad8c45f07ad183eab9c8e093e0a1db829f9b506b2d5d1/openai-1.59.9-py3-none-any.whl", hash = "sha256:61a0608a1313c08ddf92fe793b6dbd1630675a1fe3866b2f96447ce30050c448", size = 455527 }, + { url = "https://files.pythonhosted.org/packages/c0/53/782008d94f5f3141795e65bd7f87afaebb97e7516342299c1b1a08d5aaf8/openai-1.60.0-py3-none-any.whl", hash = "sha256:df06c43be8018274980ac363da07d4b417bd835ead1c66e14396f6f15a0d5dda", size = 456109 }, ] [[package]] @@ -2160,6 +2347,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/b6/c5319caea262f4821995dca2107483b94a3345d4607ad797c76cb9c36bcc/propcache-0.2.1-py3-none-any.whl", hash = "sha256:52277518d6aae65536e9cea52d4e7fd2f7a66f4aa2d30ed3f2fcea620ace3c54", size = 11818 }, ] +[[package]] +name = "protobuf" +version = "5.29.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/d1/e0a911544ca9993e0f17ce6d3cc0932752356c1b0a834397f28e63479344/protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620", size = 424945 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/7a/1e38f3cafa022f477ca0f57a1f49962f21ad25850c3ca0acd3b9d0091518/protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888", size = 422708 }, + { url = "https://files.pythonhosted.org/packages/61/fa/aae8e10512b83de633f2646506a6d835b151edf4b30d18d73afd01447253/protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a", size = 434508 }, + { url = "https://files.pythonhosted.org/packages/dd/04/3eaedc2ba17a088961d0e3bd396eac764450f431621b58a04ce898acd126/protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e", size = 417825 }, + { url = "https://files.pythonhosted.org/packages/4f/06/7c467744d23c3979ce250397e26d8ad8eeb2bea7b18ca12ad58313c1b8d5/protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84", size = 319573 }, + { url = "https://files.pythonhosted.org/packages/a8/45/2ebbde52ad2be18d3675b6bee50e68cd73c9e0654de77d595540b5129df8/protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f", size = 319672 }, + { url = "https://files.pythonhosted.org/packages/fd/b2/ab07b09e0f6d143dfb839693aa05765257bceaa13d03bf1a696b78323e7a/protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f", size = 172550 }, +] + [[package]] name = "psutil" version = "6.1.1" @@ -2337,12 +2538,20 @@ wheels = [ ] [[package]] -name = "pyflakes" -version = "3.2.0" +name = "pygithub" +version = "2.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/f9/669d8c9c86613c9d568757c7f5824bd3197d7b1c6c27553bc5618a27cce2/pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f", size = 63788 } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/d7/f1b7db88d8e4417c5d47adad627a93547f44bdc9028372dbd2313f34a855/pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a", size = 62725 }, + { url = "https://files.pythonhosted.org/packages/37/05/bfbdbbc5d8aafd8dae9b3b6877edca561fccd8528ef5edc4e7b6d23721b5/PyGithub-2.5.0-py3-none-any.whl", hash = "sha256:b0b635999a658ab8e08720bdd3318893ff20e2275f6446fcf35bf3f44f2c0fd2", size = 375935 }, ] [[package]] @@ -2354,6 +2563,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + [[package]] name = "pypdfium2" version = "4.30.1" @@ -2403,6 +2646,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/d8/defa05ae50dcd6019a95527200d3b3980043df5aa445d40cb0ef9f7f98ab/pytest_asyncio-0.25.2-py3-none-any.whl", hash = "sha256:0d0bb693f7b99da304a0634afc0a4b19e49d5e0de2d670f38dc4bfa5727c5075", size = 19400 }, ] +[[package]] +name = "pytest-mock" +version = "3.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/90/a955c3ab35ccd41ad4de556596fa86685bf4fc5ffcc62d22d856cfd4e29a/pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0", size = 32814 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 }, +] + [[package]] name = "python-bidi" version = "0.6.3" @@ -2513,6 +2768,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, ] +[[package]] +name = "python-levenshtein" +version = "0.26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "levenshtein" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/72/58d77cb80b3c130d94f53a8204ffad9acfddb925b2fb5818ff9af0b3c832/python_levenshtein-0.26.1.tar.gz", hash = "sha256:24ba578e28058ebb4afa2700057e1678d7adf27e43cd1f17700c09a9009d5d3a", size = 12276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/d7/03e0453719ed89724664f781f0255949408118093dbf77a2aa2a1198b38e/python_Levenshtein-0.26.1-py3-none-any.whl", hash = "sha256:8ef5e529dd640fb00f05ee62d998d2ee862f19566b641ace775d5ae16167b2ef", size = 9426 }, +] + [[package]] name = "python-pptx" version = "1.0.2" @@ -2673,6 +2940,80 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 }, ] +[[package]] +name = "rapidfuzz" +version = "3.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/aa/25e5a20131369d82c7b8288c99c2c3011ec47a3f5953ccc9cb8145720be5/rapidfuzz-3.11.0.tar.gz", hash = "sha256:a53ca4d3f52f00b393fab9b5913c5bafb9afc27d030c8a1db1283da6917a860f", size = 57983000 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/70/820ebf9eb22ad97b9e0bb9fd1ad8c6be4c8db5a0974d12ce27b5c9a30db0/rapidfuzz-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb8a54543d16ab1b69e2c5ed96cabbff16db044a50eddfc028000138ca9ddf33", size = 1954240 }, + { url = "https://files.pythonhosted.org/packages/41/bc/e39abdc28160d8147ccab0aa922a29be50529dcf149615a68a324ff6f9b1/rapidfuzz-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:231c8b2efbd7f8d2ecd1ae900363ba168b8870644bb8f2b5aa96e4a7573bde19", size = 1427139 }, + { url = "https://files.pythonhosted.org/packages/b6/2d/19b8e5d80257b13d73ba994552b78a69ac2ed70f1de716f1b02fcb84d09c/rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54e7f442fb9cca81e9df32333fb075ef729052bcabe05b0afc0441f462299114", size = 1419602 }, + { url = "https://files.pythonhosted.org/packages/8c/82/1fc80cc531ec712872025c19118d78eb23aff09c7144b380c2c4b544b0d1/rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:906f1f2a1b91c06599b3dd1be207449c5d4fc7bd1e1fa2f6aef161ea6223f165", size = 5635370 }, + { url = "https://files.pythonhosted.org/packages/3c/5c/007b90af25f98e301b5f7a095059b09f602701443d555724c9226a45514c/rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed59044aea9eb6c663112170f2399b040d5d7b162828b141f2673e822093fa8", size = 1680848 }, + { url = "https://files.pythonhosted.org/packages/01/04/e481530eff5d1cf337b86a3095dd4de0b758c37291e51eb0d8c4f7d49719/rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cb1965a28b0fa64abdee130c788a0bc0bb3cf9ef7e3a70bf055c086c14a3d7e", size = 1682131 }, + { url = "https://files.pythonhosted.org/packages/10/15/b0ec18edfe6146d8915679644ab7584cd0165724d6a53bcc43bd59f8edb5/rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b488b244931d0291412917e6e46ee9f6a14376625e150056fe7c4426ef28225", size = 3134097 }, + { url = "https://files.pythonhosted.org/packages/8b/0e/cf0a5d62977381bca981fc171fd6c85dc52ca1239eaacf9c1d38978c5866/rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f0ba13557fec9d5ffc0a22826754a7457cc77f1b25145be10b7bb1d143ce84c6", size = 2332928 }, + { url = "https://files.pythonhosted.org/packages/dc/71/568d383eb36586c9e7e13f1327203e2be0938e5ff070c1fa2a99b418808e/rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3871fa7dfcef00bad3c7e8ae8d8fd58089bad6fb21f608d2bf42832267ca9663", size = 6940409 }, + { url = "https://files.pythonhosted.org/packages/ba/23/02972657d69e6d3aae2cdbd67debad080410ff9ef8849d8eab5e580a48a5/rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b2669eafee38c5884a6e7cc9769d25c19428549dcdf57de8541cf9e82822e7db", size = 2715928 }, + { url = "https://files.pythonhosted.org/packages/17/17/d964d770faa4e25e125618c00e31607cf8ce639d518fc29d200edf06cfda/rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ffa1bb0e26297b0f22881b219ffc82a33a3c84ce6174a9d69406239b14575bd5", size = 3265078 }, + { url = "https://files.pythonhosted.org/packages/bc/13/a117412b1e4ed0bb23b9891a45a59812d96fde8c076b8b8b828aa7ca3710/rapidfuzz-3.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:45b15b8a118856ac9caac6877f70f38b8a0d310475d50bc814698659eabc1cdb", size = 4169215 }, + { url = "https://files.pythonhosted.org/packages/9f/0d/89ef496aedf885db4bfe7f46ac6727666afe0d9d8ca5b4f9c7cc8eef0378/rapidfuzz-3.11.0-cp310-cp310-win32.whl", hash = "sha256:22033677982b9c4c49676f215b794b0404073f8974f98739cb7234e4a9ade9ad", size = 1841736 }, + { url = "https://files.pythonhosted.org/packages/47/9a/69019f4e9c8a42e4aca0169dbae71602aba4e0fa4c5e84515f3ed682e59a/rapidfuzz-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:be15496e7244361ff0efcd86e52559bacda9cd975eccf19426a0025f9547c792", size = 1614955 }, + { url = "https://files.pythonhosted.org/packages/37/65/6fb036e39d175299ce44e5186ee2d08b9ea02d732ed6dbd70280f63b4eba/rapidfuzz-3.11.0-cp310-cp310-win_arm64.whl", hash = "sha256:714a7ba31ba46b64d30fccfe95f8013ea41a2e6237ba11a805a27cdd3bce2573", size = 863543 }, + { url = "https://files.pythonhosted.org/packages/40/ac/9ca008834104ad138fbfe2d7ae4443ada55e00c4eb4272d288897e8763b8/rapidfuzz-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8724a978f8af7059c5323d523870bf272a097478e1471295511cf58b2642ff83", size = 1955019 }, + { url = "https://files.pythonhosted.org/packages/4c/55/d026c01c9312c9c2a413679052a9bb884743fc5655e59339116d83a2125b/rapidfuzz-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b63cb1f2eb371ef20fb155e95efd96e060147bdd4ab9fc400c97325dfee9fe1", size = 1427753 }, + { url = "https://files.pythonhosted.org/packages/d1/a0/5f3fae81dd1efdf47da19641e321ae84b4f49a5a7b2ab3ff78bd04a0ae7f/rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82497f244aac10b20710448645f347d862364cc4f7d8b9ba14bd66b5ce4dec18", size = 1411472 }, + { url = "https://files.pythonhosted.org/packages/3c/3f/770b0fca00faf42983fe21fbd379f429dc2600c58d7015f969fb1f73c1db/rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:339607394941801e6e3f6c1ecd413a36e18454e7136ed1161388de674f47f9d9", size = 5614973 }, + { url = "https://files.pythonhosted.org/packages/08/6f/e3df1c41adf27f4b8a95c9de947ed49e7311a676cd05bdd62a17bb1f21ec/rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84819390a36d6166cec706b9d8f0941f115f700b7faecab5a7e22fc367408bc3", size = 1665667 }, + { url = "https://files.pythonhosted.org/packages/1a/9b/6c91b98dc70270c35913f359c17e30d4185c83663c4721363540f4c03016/rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eea8d9e20632d68f653455265b18c35f90965e26f30d4d92f831899d6682149b", size = 1676166 }, + { url = "https://files.pythonhosted.org/packages/59/9d/eec7a1bfd3566fb17617b41bfb19556c483241d6864eea3c01b88efe5459/rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b659e1e2ea2784a9a397075a7fc395bfa4fe66424042161c4bcaf6e4f637b38", size = 3130890 }, + { url = "https://files.pythonhosted.org/packages/26/7c/0a4bb5fbb03a362ea3e1409515d3ae641d9bc869c1375d97d8c47e369cc0/rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1315cd2a351144572e31fe3df68340d4b83ddec0af8b2e207cd32930c6acd037", size = 2339850 }, + { url = "https://files.pythonhosted.org/packages/f8/c1/6b839db83caaa47721398b76390a3145202beb108fa433e842879b497439/rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a7743cca45b4684c54407e8638f6d07b910d8d811347b9d42ff21262c7c23245", size = 6941921 }, + { url = "https://files.pythonhosted.org/packages/cc/c9/eaac43bb5e44f3594afddbbdb1a28d7bc0bcb69f93ed9a2ef0c949a48fb2/rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5bb636b0150daa6d3331b738f7c0f8b25eadc47f04a40e5c23c4bfb4c4e20ae3", size = 2717551 }, + { url = "https://files.pythonhosted.org/packages/ef/d3/06ca5ee6b7f030f6527ea1e80fe9a4ab3597e86bc783574e3fc2b05a5265/rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:42f4dd264ada7a9aa0805ea0da776dc063533917773cf2df5217f14eb4429eae", size = 3259550 }, + { url = "https://files.pythonhosted.org/packages/74/d8/094e75ee0424cce329901a0ff98c1821fd5d9dbc11bcdc9a3fddd2a09c4c/rapidfuzz-3.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51f24cb39e64256221e6952f22545b8ce21cacd59c0d3e367225da8fc4b868d8", size = 4173546 }, + { url = "https://files.pythonhosted.org/packages/d7/81/f263059e3d9f11b076751ac7ef4eba303fa7f11e32155658953f1697c274/rapidfuzz-3.11.0-cp311-cp311-win32.whl", hash = "sha256:aaf391fb6715866bc14681c76dc0308f46877f7c06f61d62cc993b79fc3c4a2a", size = 1842172 }, + { url = "https://files.pythonhosted.org/packages/33/04/dc42c787f02505a4ca0a961172e8353ceee74ea378b795f3e49686e944b7/rapidfuzz-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:ebadd5b8624d8ad503e505a99b8eb26fe3ea9f8e9c2234e805a27b269e585842", size = 1621122 }, + { url = "https://files.pythonhosted.org/packages/4e/0f/461e709bd641922a32bc034976963acbb11d8cf0af28b526f3f35ae07975/rapidfuzz-3.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:d895998fec712544c13cfe833890e0226585cf0391dd3948412441d5d68a2b8c", size = 864792 }, + { url = "https://files.pythonhosted.org/packages/c5/54/954ae2dc7dcb53f5f0953379a4a175d9c2f5e393656ab042843e53780d32/rapidfuzz-3.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f382fec4a7891d66fb7163c90754454030bb9200a13f82ee7860b6359f3f2fa8", size = 1938694 }, + { url = "https://files.pythonhosted.org/packages/f9/74/4682d3370821db5374c0f192d1e4123598190cb53d88936016187f80f154/rapidfuzz-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dfaefe08af2a928e72344c800dcbaf6508e86a4ed481e28355e8d4b6a6a5230e", size = 1423836 }, + { url = "https://files.pythonhosted.org/packages/e7/78/ce3d72767e186a9deca30dccb5096cfb03ec49e8e3abf2836ab10d1b4f74/rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92ebb7c12f682b5906ed98429f48a3dd80dd0f9721de30c97a01473d1a346576", size = 1393199 }, + { url = "https://files.pythonhosted.org/packages/3c/21/26bdbe846726ff7793789da07e155699cafa3ba3ed3bee86d472b4762121/rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a1b3ebc62d4bcdfdeba110944a25ab40916d5383c5e57e7c4a8dc0b6c17211a", size = 5543400 }, + { url = "https://files.pythonhosted.org/packages/c9/d5/78e922cfbfc67011ecee9f6c2fd630dee68650d23b9ce78316386a3d8c88/rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c6d7fea39cb33e71de86397d38bf7ff1a6273e40367f31d05761662ffda49e4", size = 1642855 }, + { url = "https://files.pythonhosted.org/packages/df/bb/dcf084c03c46968c3fbc52a33f2a725e0b8bb54ed714f0866c7dad747358/rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99aebef8268f2bc0b445b5640fd3312e080bd17efd3fbae4486b20ac00466308", size = 1669853 }, + { url = "https://files.pythonhosted.org/packages/ec/3a/9aa7a2c5b611e8d465e82c1d5f8278be7335769165f68f3ffc5a169f4a23/rapidfuzz-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4469307f464ae3089acf3210b8fc279110d26d10f79e576f385a98f4429f7d97", size = 3129941 }, + { url = "https://files.pythonhosted.org/packages/d3/15/2bbab50a2634b25593e36241ab9629be253b8c6ea28a34ba6b856bfea661/rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:eb97c53112b593f89a90b4f6218635a9d1eea1d7f9521a3b7d24864228bbc0aa", size = 2302199 }, + { url = "https://files.pythonhosted.org/packages/c6/7c/e3ed92b89c657348c41708fe3b856ebc982c4b220b47299bdef8da374b20/rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ef8937dae823b889c0273dfa0f0f6c46a3658ac0d851349c464d1b00e7ff4252", size = 6904702 }, + { url = "https://files.pythonhosted.org/packages/bd/4f/eed77097068bffb692d6389ae19a531c52a896275e9f5c00566207767537/rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d95f9e9f3777b96241d8a00d6377cc9c716981d828b5091082d0fe3a2924b43e", size = 2679287 }, + { url = "https://files.pythonhosted.org/packages/1f/dc/d2d5dcd5b33a5b394485c67aa13674c8345826af8d3ba0702c06ab2f6430/rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:b1d67d67f89e4e013a5295e7523bc34a7a96f2dba5dd812c7c8cb65d113cbf28", size = 3224946 }, + { url = "https://files.pythonhosted.org/packages/8f/af/17c0c29ded64e464e626dd43fc2e3028c1fa929d10e8201fb2aec654e5b3/rapidfuzz-3.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d994cf27e2f874069884d9bddf0864f9b90ad201fcc9cb2f5b82bacc17c8d5f2", size = 4144678 }, + { url = "https://files.pythonhosted.org/packages/66/5d/5dc02c87d9a0e64e0abd728d3255ddce8475e06b6be3f732a460f0a360c9/rapidfuzz-3.11.0-cp312-cp312-win32.whl", hash = "sha256:ba26d87fe7fcb56c4a53b549a9e0e9143f6b0df56d35fe6ad800c902447acd5b", size = 1824882 }, + { url = "https://files.pythonhosted.org/packages/b7/da/a37d532cbefd7242191abf18f438b315bf5c72d742f78414a8ec1b7396cf/rapidfuzz-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b1f7efdd7b7adb32102c2fa481ad6f11923e2deb191f651274be559d56fc913b", size = 1606419 }, + { url = "https://files.pythonhosted.org/packages/92/d0/1406d6e110aff87303e98f47adc5e76ef2e69d51cdd08b2d463520158cab/rapidfuzz-3.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:ed78c8e94f57b44292c1a0350f580e18d3a3c5c0800e253f1583580c1b417ad2", size = 858655 }, + { url = "https://files.pythonhosted.org/packages/8a/30/984f1013d28b88304386c8e70b5d63db4765c28be8d9ef68d177c9addc77/rapidfuzz-3.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e60814edd0c9b511b5f377d48b9782b88cfe8be07a98f99973669299c8bb318a", size = 1931354 }, + { url = "https://files.pythonhosted.org/packages/a4/8a/41d4f95c5742a8a47c0e96c02957f72f8c34411cecde87fe371d5e09807e/rapidfuzz-3.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f28952da055dbfe75828891cd3c9abf0984edc8640573c18b48c14c68ca5e06", size = 1417918 }, + { url = "https://files.pythonhosted.org/packages/e3/26/031ac8366831da6afc5f25462196eab0e0caf9422c83c007307e23a6f010/rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e8f93bc736020351a6f8e71666e1f486bb8bd5ce8112c443a30c77bfde0eb68", size = 1388327 }, + { url = "https://files.pythonhosted.org/packages/17/1b/927edcd3b540770d3d6d52fe079c6bffdb99e9dfa4b73585bee2a8bd6504/rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76a4a11ba8f678c9e5876a7d465ab86def047a4fcc043617578368755d63a1bc", size = 5513214 }, + { url = "https://files.pythonhosted.org/packages/0d/a2/c1e4f35e7bfbbd97a665f8cd119d8bd4a085f1721366cd76582dc022131b/rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc0e0d41ad8a056a9886bac91ff9d9978e54a244deb61c2972cc76b66752de9c", size = 1638560 }, + { url = "https://files.pythonhosted.org/packages/39/3f/6827972efddb1e357a0b6165ae9e310d7dc5c078af3023893365c212641b/rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e8ea35f2419c7d56b3e75fbde2698766daedb374f20eea28ac9b1f668ef4f74", size = 1667185 }, + { url = "https://files.pythonhosted.org/packages/cc/5d/6902b93e1273e69ea087afd16e7504099bcb8d712a9f69cb649ea05ca7e1/rapidfuzz-3.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd340bbd025302276b5aa221dccfe43040c7babfc32f107c36ad783f2ffd8775", size = 3107466 }, + { url = "https://files.pythonhosted.org/packages/a6/02/bdb2048c9b8edf4cd82c2e8f6a8ed9af0fbdf91810ca2b36d1be6fc996d8/rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:494eef2c68305ab75139034ea25328a04a548d297712d9cf887bf27c158c388b", size = 2302041 }, + { url = "https://files.pythonhosted.org/packages/12/91/0bbe51e3c15c02578487fd10a14692a40677ea974098d8d376bafd627a89/rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5a167344c1d6db06915fb0225592afdc24d8bafaaf02de07d4788ddd37f4bc2f", size = 6899969 }, + { url = "https://files.pythonhosted.org/packages/27/9d/09b85adfd5829f60bd6dbe53ba66dad22f93a281d494a5638b5f20fb6a8a/rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c7af25bda96ac799378ac8aba54a8ece732835c7b74cfc201b688a87ed11152", size = 2669022 }, + { url = "https://files.pythonhosted.org/packages/cb/07/6fb723963243335c3bf73925914b6998649d642eff550187454d5bb3d077/rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d2a0f7e17f33e7890257367a1662b05fecaf56625f7dbb6446227aaa2b86448b", size = 3229475 }, + { url = "https://files.pythonhosted.org/packages/3a/8e/e9af6da2e235aa29ad2bb0a1fc2472b2949ed8d9ff8fb0f05b4bfbbf7675/rapidfuzz-3.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d0d26c7172bdb64f86ee0765c5b26ea1dc45c52389175888ec073b9b28f4305", size = 4143861 }, + { url = "https://files.pythonhosted.org/packages/fd/d8/4677e36e958b4d95d039d254d597db9c020896c8130911dc36b136373b87/rapidfuzz-3.11.0-cp313-cp313-win32.whl", hash = "sha256:6ad02bab756751c90fa27f3069d7b12146613061341459abf55f8190d899649f", size = 1822624 }, + { url = "https://files.pythonhosted.org/packages/e8/97/1c782140e688ea2c3337d94516c635c575aa39fe62782fd53ad5d2119df4/rapidfuzz-3.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:b1472986fd9c5d318399a01a0881f4a0bf4950264131bb8e2deba9df6d8c362b", size = 1604273 }, + { url = "https://files.pythonhosted.org/packages/a6/83/8b713d50bec947e945a79be47f772484307fc876c426fb26c6f369098389/rapidfuzz-3.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:c408f09649cbff8da76f8d3ad878b64ba7f7abdad1471efb293d2c075e80c822", size = 857385 }, + { url = "https://files.pythonhosted.org/packages/30/5a/8ac67667663d24cc4d4b76f63783e58ef03e4d4843d02dab6b2f8470ea5e/rapidfuzz-3.11.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:f06e3c4c0a8badfc4910b9fd15beb1ad8f3b8fafa8ea82c023e5e607b66a78e4", size = 1853100 }, + { url = "https://files.pythonhosted.org/packages/dc/72/b043c26e93fb1bc5dfab1e5dd0f8d2f6135c2aa48e6db0660d4ecc5b157a/rapidfuzz-3.11.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fe7aaf5a54821d340d21412f7f6e6272a9b17a0cbafc1d68f77f2fc11009dcd5", size = 1361961 }, + { url = "https://files.pythonhosted.org/packages/5c/4a/29916c0dd853d22ef7b988af43f4e34d327581e16f60b4c9b0f229fa306c/rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25398d9ac7294e99876a3027ffc52c6bebeb2d702b1895af6ae9c541ee676702", size = 1354313 }, + { url = "https://files.pythonhosted.org/packages/41/39/f352af4ede7faeeea20bae2537f1fa60c3bbbf2696f0f2f3dda696745239/rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a52eea839e4bdc72c5e60a444d26004da00bb5bc6301e99b3dde18212e41465", size = 5478019 }, + { url = "https://files.pythonhosted.org/packages/99/8e/86f8a11ac0edda63ff5314d992aa1576fff5d8233f4310d46a6bb0551122/rapidfuzz-3.11.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c87319b0ab9d269ab84f6453601fd49b35d9e4a601bbaef43743f26fabf496c", size = 3056881 }, + { url = "https://files.pythonhosted.org/packages/98/53/222dceb24a83c7d7d76086b6d5bfd3d6aa9988ea73d356d287b5c437c0d5/rapidfuzz-3.11.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3048c6ed29d693fba7d2a7caf165f5e0bb2b9743a0989012a98a47b975355cca", size = 1543944 }, +] + [[package]] name = "referencing" version = "0.36.1" @@ -3078,6 +3419,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -3549,11 +3899,11 @@ wheels = [ [[package]] name = "tzdata" -version = "2024.2" +version = "2025.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 } +sdist = { url = "https://files.pythonhosted.org/packages/43/0f/fa4723f22942480be4ca9527bbde8d43f6c3f2fe8412f00e7f5f6746bc8b/tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694", size = 194950 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, + { url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762 }, ] [[package]] @@ -3735,6 +4085,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71", size = 2335872 }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486 }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777 }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314 }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947 }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778 }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716 }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548 }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334 }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427 }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774 }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721 }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899 }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222 }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707 }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685 }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567 }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672 }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865 }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] + [[package]] name = "xlsxwriter" version = "3.2.0"