diff --git a/README.md b/README.md
index be56614..efc520a 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@
- Generate and stream embeddings with minimalist and lightning fast framework built in rust 🦀
+ 🦀 Rust-powered Framework for Lightning-Fast End-to-End Embedding: From Source to VectorDB
Explore the docs »
@@ -38,7 +38,7 @@
-EmbedAnything is a minimalist yet highly performant, lightweight, lightening fast, multisource, multimodal and local embedding pipeline, built in rust. Whether you're working with text, images, audio, PDFs, websites, or other media, EmbedAnything simplifies the process of generating embeddings from various sources and streaming them to a vector database.We support dense, sparse and late-interaction embeddings.
+EmbedAnything is a minimalist, highly performant, lightning-fast, lightweight, multisource, multimodal, and local embedding pipeline built in Rust. Whether you're working with text, images, audio, PDFs, websites, or other media, EmbedAnything streamlines the process of generating embeddings from various sources and seamlessly streaming (memory-efficient-indexing) them to a vector database. It supports dense, sparse, and late-interaction embeddings, offering flexibility for a wide range of use cases.
diff --git a/examples/colpali.py b/examples/colpali.py
index c4c7731..7a55903 100644
--- a/examples/colpali.py
+++ b/examples/colpali.py
@@ -9,7 +9,7 @@
# Load ONNX Model
model: ColpaliModel = ColpaliModel.from_pretrained_onnx(
- "akshayballal/colpali-v1.2-merged-onnx", None
+ "starlight-ai/colpali-v1.2-merged-onnx", None
)
# Get all PDF files in the directory
diff --git a/examples/notebooks/colpali.ipynb b/examples/notebooks/colpali.ipynb
index 4b567aa..68cfe7e 100644
--- a/examples/notebooks/colpali.ipynb
+++ b/examples/notebooks/colpali.ipynb
@@ -4,6 +4,61 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: embed-anything-gpu in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (0.4.14)\n",
+ "Requirement already satisfied: onnxruntime-gpu==1.19.2 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from embed-anything-gpu) (1.19.2)\n",
+ "Requirement already satisfied: coloredlogs in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (15.0.1)\n",
+ "Requirement already satisfied: flatbuffers in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (24.3.25)\n",
+ "Requirement already satisfied: numpy>=1.21.6 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (2.1.1)\n",
+ "Requirement already satisfied: packaging in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (24.1)\n",
+ "Requirement already satisfied: protobuf in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (3.20.3)\n",
+ "Requirement already satisfied: sympy in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (1.12)\n",
+ "Requirement already satisfied: humanfriendly>=9.1 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from coloredlogs->onnxruntime-gpu==1.19.2->embed-anything-gpu) (10.0)\n",
+ "Requirement already satisfied: mpmath>=0.19 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from sympy->onnxruntime-gpu==1.19.2->embed-anything-gpu) (1.3.0)\n",
+ "Requirement already satisfied: pyreadline3 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from humanfriendly>=9.1->coloredlogs->onnxruntime-gpu==1.19.2->embed-anything-gpu) (3.5.4)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install embed-anything-gpu"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting tabulate\n",
+ " Using cached tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\n",
+ "Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)\n",
+ "Installing collected packages: tabulate\n",
+ "Successfully installed tabulate-0.9.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install tabulate"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## To Run ONNX "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"import base64\n",
@@ -13,7 +68,43 @@
"from pathlib import Path\n",
"from PIL import Image\n",
"import io\n",
+ "import torch\n",
+ "\n",
+ "model: ColpaliModel = ColpaliModel.from_pretrained_onnx(\"starlight-ai/colpali-v1.2-merged-onnx\", None)\n",
+ "\n",
+ "def process_pdf(files, model:ColpaliModel):\n",
+ " file_embed_data: list[EmbedData] = []\n",
+ " for file in files:\n",
+ " try:\n",
+ " embedding: list[EmbedData] = model.embed_file(str(file), batch_size=1)\n",
+ " file_embed_data.extend(embedding) \n",
+ " except Exception as e:\n",
+ " print(f\"Error embedding file {file}: {e}\")\n",
"\n",
+ " return file_embed_data\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## To Run it on Candle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import base64\n",
+ "from embed_anything import EmbedData, ColpaliModel\n",
+ "import numpy as np\n",
+ "from tabulate import tabulate\n",
+ "from pathlib import Path\n",
+ "from PIL import Image\n",
+ "import io\n",
+ "import torch\n",
"\n",
"model: ColpaliModel = ColpaliModel.from_pretrained(\"vidore/colpali-v1.2-merged\", None)\n",
"\n",
@@ -320,7 +411,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "computervision",
+ "display_name": "unsloth",
"language": "python",
"name": "python3"
},
@@ -334,7 +425,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.7"
+ "version": "3.11.9"
}
},
"nbformat": 4,