From d0fead4e6fbb33d2c43028c61bfff373246405a7 Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Mon, 17 Jun 2024 21:35:02 +0300 Subject: [PATCH 1/6] feat: make incognito work --- .vscode/launch.json | 19 +++++++ README.md | 10 ++-- main.py | 5 -- requirements.txt | 1 - server.py | 13 ++--- src/loader.py | 115 +++++++----------------------------------- src/select_model.py | 16 ++++++ src/tree_generator.py | 13 ++--- src/watch_utils.py | 21 ++++---- 9 files changed, 81 insertions(+), 132 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 src/select_model.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..a0bbaa3 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: FastAPI", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": [ + "server:app", + "--reload" + ], + "jinja": true + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index e7c39c7..28ce035 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,11 @@ LlamaFS runs in two "modes" - as a batch job (batch mode), and an interactive da In batch mode, you can send a directory to LlamaFS, and it will return a suggested file structure and organize your files. -In watch mode, LlamaFS starts a daemon that watches your directory. It intercepts all filesystem operations and uses your most recent edits to proactively learn how you rename file. For example, if you create a folder for your 2023 tax documents, and start moving 1-3 files in it, LlamaFS will automatically create and move the files for you! +In watch mode, LlamaFS starts a daemon that watches your directory. It intercepts all filesystem operations and uses your most recent edits to proactively learn how you rename file. For example, if you create a folder for your 2023 tax documents, and start moving 1-3 files in it, LlamaFS will automatically create and move the files for you! (watch mode defaults to sending files to groq if you have the environment variable "GROQ_API_KEY" set, otherwise through ollama) Uh... Sending all my personal files to an API provider?! No thank you! -It also has a toggle for "incognito mode," allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically. +BREAKING CHANGE: Now by default, llama-fs uses "incognito mode" (if you have not configured an environment key for "GROQ_API_KEY") allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically. To use a different model, set the environment variable "MODEL" to a string which litellm can use as a model like "ollama/llama3" or "groq/llama3-70b-8192". ## How we built it @@ -42,7 +42,7 @@ We built LlamaFS on a Python backend, leveraging the Llama3 model through Groq f ### Prerequisites Before installing, ensure you have the following requirements: -- Python 3.10 or higher +- Python 3.9 or higher - pip (Python package installer) ### Installing @@ -63,11 +63,11 @@ To install the project, follow these steps: pip install -r requirements.txt ``` -4. (Optional) Install moondream if you - want to use the incognito mode +4. Install ollama and pull the model moondream if you want to recognize images ```bash ollama pull moondream ``` + We highly recommend pulling an additional model like llama3 for local ai inference on text files. You can control which ollama model is used by setting the "MODEL" environment variable to a litellm compatible model string. ## Usage diff --git a/main.py b/main.py index 145c433..3924039 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,6 @@ import json import argparse import pathlib -from groq import Groq from llama_index.core import SimpleDirectoryReader import colorama import pathlib @@ -24,10 +23,6 @@ @click.argument("dst_path", type=click.Path()) @click.option("--auto-yes", is_flag=True, help="Automatically say yes to all prompts") def main(src_path, dst_path, auto_yes=False): - os.environ["GROQ_API_KEY"] = ( - "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" - ) - summaries = asyncio.run(get_dir_summaries(src_path)) # Get file tree diff --git a/requirements.txt b/requirements.txt index fbd6dea..c0b3c3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ ollama chromadb llama-index litellm -groq docx2txt colorama termcolor diff --git a/server.py b/server.py index 5d45c32..36b0770 100644 --- a/server.py +++ b/server.py @@ -10,14 +10,12 @@ import colorama -import ollama import threading from asciitree import LeftAligned from asciitree.drawing import BOX_LIGHT, BoxStyle from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware -from groq import Groq from llama_index.core import SimpleDirectoryReader from pydantic import BaseModel from termcolor import colored @@ -28,13 +26,10 @@ from src.watch_utils import Handler from src.watch_utils import create_file_tree as create_watch_file_tree -os.environ["GROQ_API_KEY"] = "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" - - class Request(BaseModel): path: Optional[str] = None instruction: Optional[str] = None - incognito: Optional[bool] = False + incognito: Optional[bool] = True class CommitRequest(BaseModel): @@ -71,9 +66,9 @@ async def batch(request: Request): raise HTTPException( status_code=400, detail="Path does not exist in filesystem") - summaries = await get_dir_summaries(path) + summaries = await get_dir_summaries(path, incognito=request.incognito) # Get file tree - files = create_file_tree(summaries) + files = create_file_tree(summaries, incognito=request.incognito) # Recursively create dictionary from file paths tree = {} @@ -107,7 +102,7 @@ async def watch(request: Request): observer = Observer() event_handler = Handler(path, create_watch_file_tree, response_queue) - await event_handler.set_summaries() + await event_handler.set_summaries(incognito=request.incognito) observer.schedule(event_handler, path, recursive=True) observer.start() diff --git a/src/loader.py b/src/loader.py index 2168b3b..df3b2a3 100644 --- a/src/loader.py +++ b/src/loader.py @@ -1,26 +1,30 @@ import asyncio +import http +import http.server import json import os from collections import defaultdict import agentops import colorama -import ollama import weave -from groq import AsyncGroq, Groq +import litellm +import ollama from llama_index.core import Document, SimpleDirectoryReader from llama_index.core.schema import ImageDocument from llama_index.core.node_parser import TokenTextSplitter from termcolor import colored +from src import select_model + # @weave.op() # @agentops.record_function("summarize") -async def get_dir_summaries(path: str): +async def get_dir_summaries(path: str, incognito=True): doc_dicts = load_documents(path) # metadata = process_metadata(doc_dicts) - summaries = await get_summaries(doc_dicts) + summaries = await get_summaries(doc_dicts, incognito=incognito) # Convert path to relative path for summary in summaries: @@ -90,7 +94,7 @@ def process_metadata(doc_dicts): return metadata_list -async def summarize_document(doc, client): +async def summarize_document(doc, incognito = True): PROMPT = """ You will be provided with the contents of a file along with its metadata. Provide a summary of the contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible. @@ -108,12 +112,12 @@ async def summarize_document(doc, client): attempt = 0 while attempt < max_retries: try: - chat_completion = await client.chat.completions.create( + chat_completion = litellm.completion( messages=[ {"role": "system", "content": PROMPT}, {"role": "user", "content": json.dumps(doc)}, ], - model="llama3-70b-8192", + model=select_model(incognito), response_format={"type": "json_object"}, temperature=0, ) @@ -135,7 +139,7 @@ async def summarize_document(doc, client): return summary -async def summarize_image_document(doc: ImageDocument, client): +async def summarize_image_document(doc: ImageDocument): PROMPT = """ You will be provided with an image along with its metadata. Provide a summary of the image contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible. @@ -152,7 +156,6 @@ async def summarize_image_document(doc: ImageDocument, client): client = ollama.AsyncClient() chat_completion = await client.chat( messages=[ - # {"role": "system", "content": "Respond with one short sentence."}, { "role": "user", "content": "Summarize the contents of this image.", @@ -162,7 +165,6 @@ async def summarize_image_document(doc: ImageDocument, client): model="moondream", # format="json", # stream=True, - options={"num_predict": 128}, ) summary = { @@ -176,21 +178,18 @@ async def summarize_image_document(doc: ImageDocument, client): return summary -async def dispatch_summarize_document(doc, client): +async def dispatch_summarize_document(doc, incognito=True): if isinstance(doc, ImageDocument): - return await summarize_image_document(doc, client) + return await summarize_image_document(doc) elif isinstance(doc, Document): - return await summarize_document({"content": doc.text, **doc.metadata}, client) + return await summarize_document({"content": doc.text, **doc.metadata}, incognito=incognito) else: raise ValueError("Document type not supported") -async def get_summaries(documents): - client = AsyncGroq( - api_key=os.environ.get("GROQ_API_KEY"), - ) +async def get_summaries(documents, incognito=True): summaries = await asyncio.gather( - *[dispatch_summarize_document(doc, client) for doc in documents] + *[dispatch_summarize_document(doc, incognito=incognito) for doc in documents] ) return summaries @@ -219,88 +218,12 @@ def merge_summary_documents(summaries, metadata_list): ################################################################################################ -def get_file_summary(path: str): - client = Groq( - api_key=os.environ.get("GROQ_API_KEY"), - ) +def get_file_summary(path: str, incognito=True): reader = SimpleDirectoryReader(input_files=[path]).iter_data() docs = next(reader) splitter = TokenTextSplitter(chunk_size=6144) text = splitter.split_text("\n".join([d.text for d in docs]))[0] doc = Document(text=text, metadata=docs[0].metadata) - summary = dispatch_summarize_document_sync(doc, client) - return summary - - -def dispatch_summarize_document_sync(doc, client): - if isinstance(doc, ImageDocument): - return summarize_image_document_sync(doc, client) - elif isinstance(doc, Document): - return summarize_document_sync({"content": doc.text, **doc.metadata}, client) - else: - raise ValueError("Document type not supported") - - -def summarize_document_sync(doc, client): - PROMPT = """ -You will be provided with the contents of a file along with its metadata. Provide a summary of the contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible. - -Write your response a JSON object with the following schema: - -```json -{ - "file_path": "path to the file including name", - "summary": "summary of the content" -} -``` -""".strip() - - chat_completion = client.chat.completions.create( - messages=[ - {"role": "system", "content": PROMPT}, - {"role": "user", "content": json.dumps(doc)}, - ], - model="llama3-70b-8192", - response_format={"type": "json_object"}, - temperature=0, - ) - summary = json.loads(chat_completion.choices[0].message.content) - - try: - print(colored(summary["file_path"], "green")) # Print the filename in green - print(summary["summary"]) # Print the summary of the contents - print("-" * 80 + "\n") # Print a separator line with spacing for readability - except KeyError as e: - print(e) - print(summary) - - return summary - - -def summarize_image_document_sync(doc: ImageDocument, client): - client = ollama.Client() - chat_completion = client.chat( - messages=[ - { - "role": "user", - "content": "Summarize the contents of this image.", - "images": [doc.image_path], - }, - ], - model="moondream", - # format="json", - # stream=True, - options={"num_predict": 128}, - ) - - summary = { - "file_path": doc.image_path, - "summary": chat_completion["message"]["content"], - } - - print(colored(summary["file_path"], "green")) # Print the filename in green - print(summary["summary"]) # Print the summary of the contents - print("-" * 80 + "\n") # Print a separator line with spacing for readability - + summary = dispatch_summarize_document(doc, incognito=incognito) return summary diff --git a/src/select_model.py b/src/select_model.py new file mode 100644 index 0000000..e23d19a --- /dev/null +++ b/src/select_model.py @@ -0,0 +1,16 @@ +from os import environ +from litellm import validate_environment +import warnings + +def select_model(incognito=True): + model = "groq/llama3-70b-8192" if environ.get("GROQ_API_KEY") and incognito is False else environ.get("MODEL", "ollama/llama3") + litellm_validation = validate_environment(model) + if litellm_validation.get('keys_in_environment') is False: + raise EnvironmentError({ + "errno": 1, + "strerr": f"missing environment variables for model {model}", + "missing_keys": ','.join(litellm_validation.get("missing_keys")) + }) + if "ollama" not in model: + warnings.warn(f"sending the contents of your files to {model}!") + return model \ No newline at end of file diff --git a/src/tree_generator.py b/src/tree_generator.py index 13a39d1..3fd20f7 100644 --- a/src/tree_generator.py +++ b/src/tree_generator.py @@ -1,7 +1,9 @@ -from groq import Groq +import litellm import json import os +from src import select_model + FILE_PROMPT = """ You will be provided with list of source files and a summary of their contents. For each file, propose a new path and filename, using a directory structure that optimally organizes the files using known conventions and best practices. Follow good naming conventions. Here are a few guidelines @@ -27,15 +29,14 @@ """.strip() -def create_file_tree(summaries: list): - client = Groq(api_key=os.environ.get("GROQ_API_KEY")) - chat_completion = client.chat.completions.create( +def create_file_tree(summaries: list, incognito=True): + chat_completion = litellm.completion( messages=[ {"role": "system", "content": FILE_PROMPT}, {"role": "user", "content": json.dumps(summaries)}, ], - model="llama3-70b-8192", - response_format={"type": "json_object"}, # Uncomment if needed + model=select_model(incognito), + response_format={"type": "json_object"}, temperature=0, ) diff --git a/src/watch_utils.py b/src/watch_utils.py index 8eaa99f..053bf23 100644 --- a/src/watch_utils.py +++ b/src/watch_utils.py @@ -3,26 +3,28 @@ import os import time -from groq import Groq +import litellm from watchdog.events import FileSystemEvent, FileSystemEventHandler from watchdog.observers import Observer +from src import select_model from src.loader import get_dir_summaries, get_file_summary -os.environ["GROQ_API_KEY"] = "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" - +watch_incognito = False class Handler(FileSystemEventHandler): - def __init__(self, base_path, callback, queue): + def __init__(self, base_path, callback, queue, incognito): self.base_path = base_path self.callback = callback self.queue = queue + watch_incognito = incognito self.events = [] print(f"Watching directory {base_path}") - async def set_summaries(self): + async def set_summaries(self, incognito=False): print(f"Getting summaries for {self.base_path}") - self.summaries = await get_dir_summaries(self.base_path) + watch_incognito = incognito + self.summaries = await get_dir_summaries(self.base_path, incognito=incognito) self.summaries_cache = {s["file_path"]: s for s in self.summaries} def update_summary(self, file_path): @@ -31,7 +33,7 @@ def update_summary(self, file_path): if not os.path.exists(path): self.summaries_cache.pop(file_path) return - self.summaries_cache[file_path] = get_file_summary(path) + self.summaries_cache[file_path] = get_file_summary(path, watch_incognito) self.summaries = list(self.summaries_cache.values()) self.queue.put( { @@ -109,15 +111,14 @@ def create_file_tree(summaries, fs_events): Include the above items in your response exactly as is, along all other proposed changes. """.strip() - client = Groq() - cmpl = client.chat.completions.create( + cmpl = litellm.completion( messages=[ {"content": FILE_PROMPT, "role": "system"}, {"content": json.dumps(summaries), "role": "user"}, {"content": WATCH_PROMPT, "role": "system"}, {"content": json.dumps(fs_events), "role": "user"}, ], - model="llama3-70b-8192", + model=select_model(watch_incognito), response_format={"type": "json_object"}, temperature=0, ) From f317521ab1d4c587fec4f9939db2bb873df28495 Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Mon, 17 Jun 2024 19:56:42 +0000 Subject: [PATCH 2/6] fix: import --- src/loader.py | 5 ++--- src/tree_generator.py | 2 +- src/watch_utils.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/loader.py b/src/loader.py index df3b2a3..9c83e84 100644 --- a/src/loader.py +++ b/src/loader.py @@ -14,8 +14,7 @@ from llama_index.core.schema import ImageDocument from llama_index.core.node_parser import TokenTextSplitter from termcolor import colored - -from src import select_model +from src.select_model import select_model # @weave.op() @@ -123,7 +122,7 @@ async def summarize_document(doc, incognito = True): ) break except Exception as e: - print("Error status {}".format(e.status_code)) + print(e) attempt += 1 summary = json.loads(chat_completion.choices[0].message.content) diff --git a/src/tree_generator.py b/src/tree_generator.py index 3fd20f7..3152ec1 100644 --- a/src/tree_generator.py +++ b/src/tree_generator.py @@ -2,7 +2,7 @@ import json import os -from src import select_model +from src.select_model import select_model FILE_PROMPT = """ You will be provided with list of source files and a summary of their contents. For each file, propose a new path and filename, using a directory structure that optimally organizes the files using known conventions and best practices. diff --git a/src/watch_utils.py b/src/watch_utils.py index 053bf23..bdd2761 100644 --- a/src/watch_utils.py +++ b/src/watch_utils.py @@ -7,7 +7,7 @@ from watchdog.events import FileSystemEvent, FileSystemEventHandler from watchdog.observers import Observer -from src import select_model +from src.select_model import select_model from src.loader import get_dir_summaries, get_file_summary watch_incognito = False From 8b6367b15e530c5b846386767c9aa9b2186af2f0 Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Mon, 17 Jun 2024 20:06:06 +0000 Subject: [PATCH 3/6] fix: --- .vscode/launch.json | 4 ++++ README.md | 1 + 2 files changed, 5 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index a0bbaa3..c60b7f2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -13,6 +13,10 @@ "server:app", "--reload" ], + "env": { + "OLLAMA_API_BASE": "http://localhost:11434", + "MODEL": "ollama/tinyllama" + }, "jinja": true } ] diff --git a/README.md b/README.md index 28ce035..15f54d0 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,7 @@ To install the project, follow these steps: ollama pull moondream ``` We highly recommend pulling an additional model like llama3 for local ai inference on text files. You can control which ollama model is used by setting the "MODEL" environment variable to a litellm compatible model string. +5. Setup the environment variables for MODEL OLLAMA_API_BASE and whatever api keys you need ## Usage From 076969443df6b2974e82546ddd04664d48d15b9b Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Tue, 18 Jun 2024 00:17:22 +0300 Subject: [PATCH 4/6] fix: wont output json like this --- src/loader.py | 29 ++++++++++++----------------- src/tree_generator.py | 4 +++- src/watch_utils.py | 3 ++- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/loader.py b/src/loader.py index 9c83e84..34a8c7e 100644 --- a/src/loader.py +++ b/src/loader.py @@ -105,25 +105,20 @@ async def summarize_document(doc, incognito = True): "summary": "summary of the content" } ``` + +only return the json, no chit chat """.strip() - max_retries = 5 - attempt = 0 - while attempt < max_retries: - try: - chat_completion = litellm.completion( - messages=[ - {"role": "system", "content": PROMPT}, - {"role": "user", "content": json.dumps(doc)}, - ], - model=select_model(incognito), - response_format={"type": "json_object"}, - temperature=0, - ) - break - except Exception as e: - print(e) - attempt += 1 + + chat_completion = litellm.completion( + messages=[ + {"role": "system", "content": PROMPT}, + {"role": "user", "content": json.dumps(doc)}, + ], + model=select_model(incognito), + temperature=0, + max_retries=5, + ) summary = json.loads(chat_completion.choices[0].message.content) diff --git a/src/tree_generator.py b/src/tree_generator.py index 3152ec1..ebd9d60 100644 --- a/src/tree_generator.py +++ b/src/tree_generator.py @@ -26,6 +26,8 @@ ] } ``` + +only return the json, no chit chat """.strip() @@ -36,8 +38,8 @@ def create_file_tree(summaries: list, incognito=True): {"role": "user", "content": json.dumps(summaries)}, ], model=select_model(incognito), - response_format={"type": "json_object"}, temperature=0, + max_retries=5, ) file_tree = json.loads(chat_completion.choices[0].message.content)["files"] diff --git a/src/watch_utils.py b/src/watch_utils.py index bdd2761..2ef57d8 100644 --- a/src/watch_utils.py +++ b/src/watch_utils.py @@ -109,6 +109,7 @@ def create_file_tree(summaries, fs_events): ``` Include the above items in your response exactly as is, along all other proposed changes. +Only return json, no chit chat. """.strip() cmpl = litellm.completion( @@ -119,7 +120,7 @@ def create_file_tree(summaries, fs_events): {"content": json.dumps(fs_events), "role": "user"}, ], model=select_model(watch_incognito), - response_format={"type": "json_object"}, temperature=0, + max_retries=5, ) return json.loads(cmpl.choices[0].message.content)["files"] From 9f230a917a2018f6194769d12e464cfcf466fa6b Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Tue, 18 Jun 2024 00:35:09 +0300 Subject: [PATCH 5/6] fix: --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 36b0770..823a7cf 100644 --- a/server.py +++ b/server.py @@ -101,7 +101,7 @@ async def watch(request: Request): response_queue = queue.Queue() observer = Observer() - event_handler = Handler(path, create_watch_file_tree, response_queue) + event_handler = Handler(path, create_watch_file_tree, response_queue, incognito=request.incognito) await event_handler.set_summaries(incognito=request.incognito) observer.schedule(event_handler, path, recursive=True) observer.start() From 238dbbb288e8112617b53d3fb6b4307695bfd5df Mon Sep 17 00:00:00 2001 From: Michael Salaverry Date: Tue, 18 Jun 2024 09:05:13 +0300 Subject: [PATCH 6/6] feat: choose your own image model refactor: remove ollama direct usage in favor of litellm direct usage bug: didn't work on the full sample data, kept looping on the pdf --- README.md | 2 +- src/loader.py | 28 ++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 15f54d0..5fd6ef2 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ In watch mode, LlamaFS starts a daemon that watches your directory. It intercept Uh... Sending all my personal files to an API provider?! No thank you! -BREAKING CHANGE: Now by default, llama-fs uses "incognito mode" (if you have not configured an environment key for "GROQ_API_KEY") allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically. To use a different model, set the environment variable "MODEL" to a string which litellm can use as a model like "ollama/llama3" or "groq/llama3-70b-8192". +BREAKING CHANGE: Now by default, llama-fs uses "incognito mode" (if you have not configured an environment key for "GROQ_API_KEY") allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically. To use a different model, set the environment variable "MODEL" to a string which litellm can use as a model like "ollama/llama3" or "groq/llama3-70b-8192". Additionally, you can pick your image model by setting the "IMAGE_MODEL" environment variable to something like "ollama/moondream" or "gpt-4o" (defaults to moondream). ## How we built it diff --git a/src/loader.py b/src/loader.py index 34a8c7e..2504269 100644 --- a/src/loader.py +++ b/src/loader.py @@ -3,6 +3,7 @@ import http.server import json import os +import base64 from collections import defaultdict import agentops @@ -132,6 +133,10 @@ async def summarize_document(doc, incognito = True): return summary +def convert_image_to_base64(path: str, file_type: str) -> str: + with open(path, 'rb') as image_bytes: + base64_image = base64.b64encode(image_bytes.read()).decode("utf-8") + return f"data:{file_type};base64,{base64_image}" async def summarize_image_document(doc: ImageDocument): PROMPT = """ @@ -147,23 +152,30 @@ async def summarize_image_document(doc: ImageDocument): ``` """.strip() - client = ollama.AsyncClient() - chat_completion = await client.chat( + chat_completion = litellm.completion( messages=[ { "role": "user", - "content": "Summarize the contents of this image.", - "images": [doc.image_path], + "content": [ + { + "type": "text", + "text": "Summarize the contents of this image." + }, + { + "type": "image_url", + "image_url": { + "url": convert_image_to_base64(doc.image_path, doc.extra_info.get('file_type')) + } + } + ], }, ], - model="moondream", - # format="json", - # stream=True, + model=os.environ.get('IMAGE_MODEL') if os.environ.get('IMAGE_MODEL') is not None else "ollama/moondream", ) summary = { "file_path": doc.image_path, - "summary": chat_completion["message"]["content"], + "summary": chat_completion.choices[0].message.content, } print(colored(summary["file_path"], "green")) # Print the filename in green