Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
akshayballal95 committed Nov 17, 2024
2 parents 9e9710a + 04af910 commit 8893ad1
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 5 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<div align="center">

<p align="center">
<b>Generate and stream embeddings with minimalist and lightning fast framework built in rust 🦀</b>
<b>🦀 Rust-powered Framework for Lightning-Fast End-to-End Embedding: From Source to VectorDB</b>
<br />
<a href="https://starlightsearch.github.io/EmbedAnything/references/"><strong>Explore the docs »</strong></a>
<br />
Expand All @@ -38,7 +38,7 @@
</div>


EmbedAnything is a minimalist yet highly performant, lightweight, lightening fast, multisource, multimodal and local embedding pipeline, built in rust. Whether you're working with text, images, audio, PDFs, websites, or other media, EmbedAnything simplifies the process of generating embeddings from various sources and streaming them to a vector database.We support dense, sparse and late-interaction embeddings.
EmbedAnything is a minimalist, highly performant, lightning-fast, lightweight, multisource, multimodal, and local embedding pipeline built in Rust. Whether you're working with text, images, audio, PDFs, websites, or other media, EmbedAnything streamlines the process of generating embeddings from various sources and seamlessly streaming (memory-efficient-indexing) them to a vector database. It supports dense, sparse, and late-interaction embeddings, offering flexibility for a wide range of use cases.

<!-- TABLE OF CONTENTS -->
<details>
Expand Down
2 changes: 1 addition & 1 deletion examples/colpali.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Load ONNX Model
model: ColpaliModel = ColpaliModel.from_pretrained_onnx(
"akshayballal/colpali-v1.2-merged-onnx", None
"starlight-ai/colpali-v1.2-merged-onnx", None
)

# Get all PDF files in the directory
Expand Down
95 changes: 93 additions & 2 deletions examples/notebooks/colpali.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,61 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: embed-anything-gpu in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (0.4.14)\n",
"Requirement already satisfied: onnxruntime-gpu==1.19.2 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from embed-anything-gpu) (1.19.2)\n",
"Requirement already satisfied: coloredlogs in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (15.0.1)\n",
"Requirement already satisfied: flatbuffers in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (24.3.25)\n",
"Requirement already satisfied: numpy>=1.21.6 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (2.1.1)\n",
"Requirement already satisfied: packaging in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (24.1)\n",
"Requirement already satisfied: protobuf in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (3.20.3)\n",
"Requirement already satisfied: sympy in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from onnxruntime-gpu==1.19.2->embed-anything-gpu) (1.12)\n",
"Requirement already satisfied: humanfriendly>=9.1 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from coloredlogs->onnxruntime-gpu==1.19.2->embed-anything-gpu) (10.0)\n",
"Requirement already satisfied: mpmath>=0.19 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from sympy->onnxruntime-gpu==1.19.2->embed-anything-gpu) (1.3.0)\n",
"Requirement already satisfied: pyreadline3 in c:\\users\\sonam\\anaconda3\\envs\\unsloth\\lib\\site-packages (from humanfriendly>=9.1->coloredlogs->onnxruntime-gpu==1.19.2->embed-anything-gpu) (3.5.4)\n"
]
}
],
"source": [
"!pip install embed-anything-gpu"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting tabulate\n",
" Using cached tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)\n",
"Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)\n",
"Installing collected packages: tabulate\n",
"Successfully installed tabulate-0.9.0\n"
]
}
],
"source": [
"!pip install tabulate"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## To Run ONNX "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
Expand All @@ -13,7 +68,43 @@
"from pathlib import Path\n",
"from PIL import Image\n",
"import io\n",
"import torch\n",
"\n",
"model: ColpaliModel = ColpaliModel.from_pretrained_onnx(\"starlight-ai/colpali-v1.2-merged-onnx\", None)\n",
"\n",
"def process_pdf(files, model:ColpaliModel):\n",
" file_embed_data: list[EmbedData] = []\n",
" for file in files:\n",
" try:\n",
" embedding: list[EmbedData] = model.embed_file(str(file), batch_size=1)\n",
" file_embed_data.extend(embedding) \n",
" except Exception as e:\n",
" print(f\"Error embedding file {file}: {e}\")\n",
"\n",
" return file_embed_data\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## To Run it on Candle"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
"from embed_anything import EmbedData, ColpaliModel\n",
"import numpy as np\n",
"from tabulate import tabulate\n",
"from pathlib import Path\n",
"from PIL import Image\n",
"import io\n",
"import torch\n",
"\n",
"model: ColpaliModel = ColpaliModel.from_pretrained(\"vidore/colpali-v1.2-merged\", None)\n",
"\n",
Expand Down Expand Up @@ -320,7 +411,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "computervision",
"display_name": "unsloth",
"language": "python",
"name": "python3"
},
Expand All @@ -334,7 +425,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 8893ad1

Please sign in to comment.