Skip to content

Commit

Permalink
Merge pull request #103 from boswelja/embeder-to-embedder
Browse files Browse the repository at this point in the history
Embeder -> Embedder
  • Loading branch information
sonam-pankaj95 authored Dec 20, 2024
2 parents 9ad352c + fe6e73e commit 6f82548
Show file tree
Hide file tree
Showing 29 changed files with 168 additions and 168 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ model = EmbeddingModel.from_pretrained_hf(
WhichModel.Bert, model_id="model link from huggingface"
)
config = TextEmbedConfig(chunk_size=200, batch_size=32)
data = embed_anything.embed_file("file_address", embeder=model, config=config)
data = embed_anything.embed_file("file_address", embedder=model, config=config)
```


Expand Down Expand Up @@ -190,7 +190,7 @@ pip install embed-anything-gpu
model = EmbeddingModel.from_pretrained_local(
WhichModel.Bert, model_id="Hugging_face_link"
)
data = embed_anything.embed_file("test_files/test.pdf", embeder=model)
data = embed_anything.embed_file("test_files/test.pdf", embedder=model)
```


Expand All @@ -206,11 +206,11 @@ model = embed_anything.EmbeddingModel.from_pretrained_local(
model_id="openai/clip-vit-base-patch16",
# revision="refs/pr/15",
)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embeder=model)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embedder=model)
embeddings = np.array([data.embedding for data in data])
query = ["Photo of a monkey?"]
query_embedding = np.array(
embed_anything.embed_query(query, embeder=model)[0].embedding
embed_anything.embed_query(query, embedder=model)[0].embedding
)
similarities = np.dot(embeddings, query_embedding)
max_index = np.argmax(similarities)
Expand All @@ -233,7 +233,7 @@ from embed_anything import (
audio_decoder = AudioDecoderModel.from_pretrained_hf(
"openai/whisper-tiny.en", revision="main", model_type="tiny-en", quantized=False
)
embeder = EmbeddingModel.from_pretrained_hf(
embedder = EmbeddingModel.from_pretrained_hf(
embed_anything.WhichModel.Bert,
model_id="sentence-transformers/all-MiniLM-L6-v2",
revision="main",
Expand All @@ -242,7 +242,7 @@ config = TextEmbedConfig(chunk_size=200, batch_size=32)
data = embed_anything.embed_audio_file(
"test_files/audio/samples_hp0.wav",
audio_decoder=audio_decoder,
embeder=embeder,
embedder=embedder,
text_embed_config=config,
)
print(data[0].metadata)
Expand Down
2 changes: 1 addition & 1 deletion docs/blog/posts/embed-anything.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ model = EmbeddingModel.from_pretrained_hf(
WhichModel.Bert, model_id="model link from huggingface"
)
config = TextEmbedConfig(chunk_size=200, batch_size=32)
data = embed_anything.embed_file("file_address", embeder=model, config=config)
data = embed_anything.embed_file("file_address", embedder=model, config=config)
```
You can check out the documentation at https://starlight-search.com/references/

Expand Down
4 changes: 2 additions & 2 deletions docs/blog/posts/vector-streaming.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ model = embed_anything.EmbeddingModel.from_pretrained_cloud(

data = embed_anything.embed_image_directory(
"\image_directory",
embeder=model,
embedder=model,
adapter=weaviate_adapter,
config=embed_anything.ImageEmbedConfig(buffer_size=100),
)
Expand All @@ -124,7 +124,7 @@ data = embed_anything.embed_image_directory(
#### Step 4: Query the Vector Database

```python
query_vector = embed_anything.embed_query(["image of a cat"], embeder=model)[0].embedding
query_vector = embed_anything.embed_query(["image of a cat"], embedder=model)[0].embedding
```

#### Step 5: Query the Vector Database
Expand Down
8 changes: 4 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pip install embed-anything-gpu
model = EmbeddingModel.from_pretrained_local(
WhichModel.Bert, model_id="sentence-transformers/all-MiniLM-L6-v2"
)
data = embed_anything.embed_file("test_files/test.pdf", embeder=model)
data = embed_anything.embed_file("test_files/test.pdf", embedder=model)
```


Expand Down Expand Up @@ -162,11 +162,11 @@ model = embed_anything.EmbeddingModel.from_pretrained_local(
model_id="openai/clip-vit-base-patch16",
# revision="refs/pr/15",
)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embeder=model)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embedder=model)
embeddings = np.array([data.embedding for data in data])
query = ["Photo of a monkey?"]
query_embedding = np.array(
embed_anything.embed_query(query, embeder=model)[0].embedding
embed_anything.embed_query(query, embedder=model)[0].embedding
)
similarities = np.dot(embeddings, query_embedding)
max_index = np.argmax(similarities)
Expand Down Expand Up @@ -199,7 +199,7 @@ jina_config = JinaConfig(

config = EmbedConfig(jina=jina_config, audio_decoder=audio_decoder_config)
data = embed_anything.embed_file(
"test_files/audio/samples_hp0.wav", embeder="Audio", config=config
"test_files/audio/samples_hp0.wav", embedder="Audio", config=config
)
print(data[0].metadata)
end_time = time.time()
Expand Down
2 changes: 1 addition & 1 deletion examples/adapters/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def upsert(self, data: List[Dict]):

data = embed_anything.embed_file(
"/path/to/my-file.pdf",
embeder="Bert",
embedder="Bert",
adapter=elasticsearch_adapter,
config=embed_config,
)
Expand Down
2 changes: 1 addition & 1 deletion examples/adapters/pinecone_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def upsert(self, data: List[Dict]):

data = embed_anything.embed_image_directory(
"test_files",
embeder=clip_model,
embedder=clip_model,
adapter=pinecone_adapter,
config=embed_config,
)
Expand Down
4 changes: 2 additions & 2 deletions examples/adapters/weaviate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def delete_index(self, index_name: str):


data = embed_anything.embed_directory(
"test_files", embeder=model, adapter=weaviate_adapter
"test_files", embedder=model, adapter=weaviate_adapter
)

query_vector = embed_anything.embed_query(["What is attention"], embeder=model)[
query_vector = embed_anything.embed_query(["What is attention"], embedder=model)[
0
].embedding

Expand Down
4 changes: 2 additions & 2 deletions examples/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"openai/whisper-tiny.en", revision="main", model_type="tiny-en", quantized=False
)

embeder = EmbeddingModel.from_pretrained_hf(
embedder = EmbeddingModel.from_pretrained_hf(
embed_anything.WhichModel.Bert,
model_id="sentence-transformers/all-MiniLM-L6-v2",
revision="main",
Expand All @@ -24,7 +24,7 @@
data = embed_anything.embed_audio_file(
"test_files/audio/samples_hp0.wav",
audio_decoder=audio_decoder,
embeder=embeder,
embedder=embedder,
text_embed_config=config,
)
print(data[0].metadata)
Expand Down
4 changes: 2 additions & 2 deletions examples/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
model_id="openai/clip-vit-base-patch16",
)
data: list[EmbedData] = embed_anything.embed_image_directory(
"test_files", embeder=model
"test_files", embedder=model
)

# Convert the embeddings to a numpy array
Expand All @@ -22,7 +22,7 @@
# Embed a query
query = ["Photo of a monkey?"]
query_embedding = np.array(
embed_anything.embed_query(query, embeder=model)[0].embedding
embed_anything.embed_query(query, embedder=model)[0].embedding
)

# Calculate the similarities between the query embedding and all the embeddings
Expand Down
8 changes: 4 additions & 4 deletions examples/hybridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,16 @@
WhichModel.Jina, model_id="jinaai/jina-embeddings-v2-small-en"
)

jina_embedddings = embed_anything.embed_query(sentences, embeder=jina_model)
jina_query = embed_anything.embed_query(query_text, embeder=jina_model)[0]
jina_embedddings = embed_anything.embed_query(sentences, embedder=jina_model)
jina_query = embed_anything.embed_query(query_text, embedder=jina_model)[0]


splade_model = EmbeddingModel.from_pretrained_hf(
WhichModel.SparseBert, "prithivida/Splade_PP_en_v1"
)
jina_embedddings = embed_anything.embed_query(sentences, embeder=jina_model)
jina_embedddings = embed_anything.embed_query(sentences, embedder=jina_model)

splade_query = embed_anything.embed_query(query_text, embeder=splade_model)
splade_query = embed_anything.embed_query(query_text, embedder=splade_model)

client.query_points(
collection_name="my-hybrid-collection",
Expand Down
2 changes: 1 addition & 1 deletion examples/onnx_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"The dog is sitting in the park",
]

embedddings = embed_query(sentences, embeder=model)
embedddings = embed_query(sentences, embedder=model)

embed_vector = np.array([e.embedding for e in embedddings])

Expand Down
2 changes: 1 addition & 1 deletion examples/semantic_chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
semantic_encoder=semantic_encoder,
)

data = embed_anything.embed_file("test_files/bank.txt", embeder=model, config=config)
data = embed_anything.embed_file("test_files/bank.txt", embedder=model, config=config)

for d in data:
print(d.text)
Expand Down
2 changes: 1 addition & 1 deletion examples/splade.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"Do you like pizza?",
]

embedddings = embed_query(sentences, embeder=model)
embedddings = embed_query(sentences, embedder=model)

embed_vector = np.array([e.embedding for e in embedddings])

Expand Down
8 changes: 4 additions & 4 deletions examples/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def embed_directory_example():

# Embed all files in a directory
data: list[EmbedData] = embed_anything.embed_directory(
"bench", embeder=model, config=config
"bench", embedder=model, config=config
)

# End timing
Expand All @@ -39,7 +39,7 @@ def embed_query_example():

# Embed a query
embeddings: EmbedData = embed_anything.embed_query(
["Hello world my"], embeder=model, config=config
["Hello world my"], embedder=model, config=config
)[0]

# Print the shape of the embedding
Expand All @@ -48,7 +48,7 @@ def embed_query_example():
# Embed another query and print the result
print(
embed_anything.embed_query(
["What is the capital of India?"], embeder=model, config=config
["What is the capital of India?"], embedder=model, config=config
)
)

Expand All @@ -62,7 +62,7 @@ def embed_file_example():

# Embed a single file
data: list[EmbedData] = embed_anything.embed_file(
"test_files/bank.txt", embeder=model, config=config
"test_files/bank.txt", embedder=model, config=config
)

# Print the embedded data
Expand Down
2 changes: 1 addition & 1 deletion examples/text_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

data: list[EmbedData] = embed_anything.embed_file(
"/home/akshay/projects/starlaw/src-server/test_files/court.pdf", # Replace with your file path
embeder=model,
embedder=model,
config=config,
)
end = time()
Expand Down
2 changes: 1 addition & 1 deletion examples/web.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import embed_anything

data = embed_anything.embed_webpage("https://www.akshaymakes.com/", embeder="Bert")
data = embed_anything.embed_webpage("https://www.akshaymakes.com/", embedder="Bert")
12 changes: 6 additions & 6 deletions python/python/embed_anything/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
model = EmbeddingModel.from_pretrained_local(
WhichModel.Bert, model_id="Hugging_face_link"
)
data = embed_anything.embed_file("test_files/test.pdf", embeder=model)
data = embed_anything.embed_file("test_files/test.pdf", embedder=model)
#For images
Expand All @@ -30,11 +30,11 @@
model_id="openai/clip-vit-base-patch16",
# revision="refs/pr/15",
)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embeder=model)
data: list[EmbedData] = embed_anything.embed_directory("test_files", embedder=model)
embeddings = np.array([data.embedding for data in data])
query = ["Photo of a monkey?"]
query_embedding = np.array(
embed_anything.embed_query(query, embeder=model)[0].embedding
embed_anything.embed_query(query, embedder=model)[0].embedding
)
# For audio files
from embed_anything import (
Expand All @@ -47,7 +47,7 @@
audio_decoder = AudioDecoderModel.from_pretrained_hf(
"openai/whisper-tiny.en", revision="main", model_type="tiny-en", quantized=False
)
embeder = EmbeddingModel.from_pretrained_hf(
embedder = EmbeddingModel.from_pretrained_hf(
embed_anything.WhichModel.Bert,
model_id="sentence-transformers/all-MiniLM-L6-v2",
revision="main",
Expand All @@ -56,7 +56,7 @@
data = embed_anything.embed_audio_file(
"test_files/audio/samples_hp0.wav",
audio_decoder=audio_decoder,
embeder=embeder,
embedder=embedder,
text_embed_config=config,
)
Expand Down Expand Up @@ -98,7 +98,7 @@
data = embed_anything.embed_image_directory(
"test_files",
embeder=clip_model,
embedder=clip_model,
adapter=pinecone_adapter,
# config=embed_config,
```
Expand Down
Loading

0 comments on commit 6f82548

Please sign in to comment.