diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..c60b7f2
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,23 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: FastAPI",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "uvicorn",
+            "args": [
+                "server:app",
+                "--reload"
+            ],
+            "env": {
+                "OLLAMA_API_BASE": "http://localhost:11434",
+                "MODEL": "ollama/tinyllama"
+            },
+            "jinja": true
+        }
+    ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index e7c39c7..5fd6ef2 100644
--- a/README.md
+++ b/README.md
@@ -18,11 +18,11 @@ LlamaFS runs in two "modes" - as a batch job (batch mode), and an interactive da
 
 In batch mode, you can send a directory to LlamaFS, and it will return a suggested file structure and organize your files.
 
-In watch mode, LlamaFS starts a daemon that watches your directory. It intercepts all filesystem operations and uses your most recent edits to proactively learn how you rename file. For example, if you create a folder for your 2023 tax documents, and start moving 1-3 files in it, LlamaFS will automatically create and move the files for you!
+In watch mode, LlamaFS starts a daemon that watches your directory. It intercepts all filesystem operations and uses your most recent edits to proactively learn how you rename file. For example, if you create a folder for your 2023 tax documents, and start moving 1-3 files in it, LlamaFS will automatically create and move the files for you! (watch mode defaults to sending files to groq if you have the environment variable "GROQ_API_KEY" set, otherwise through ollama)
 
 Uh... Sending all my personal files to an API provider?! No thank you!
 
-It also has a toggle for "incognito mode," allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically.
+BREAKING CHANGE: Now by default, llama-fs uses "incognito mode" (if you have not configured an environment key for "GROQ_API_KEY") allowing you route every request through Ollama instead of Groq. Since they use the same Llama 3 model, the perform identically. To use a different model, set the environment variable "MODEL" to a string which litellm can use as a model like "ollama/llama3" or "groq/llama3-70b-8192". Additionally, you can pick your image model by setting the "IMAGE_MODEL" environment variable to something like "ollama/moondream" or "gpt-4o" (defaults to moondream).
 
 ## How we built it
 
@@ -42,7 +42,7 @@ We built LlamaFS on a Python backend, leveraging the Llama3 model through Groq f
 ### Prerequisites
 
 Before installing, ensure you have the following requirements:
-- Python 3.10 or higher
+- Python 3.9 or higher
 - pip (Python package installer)
 
 ### Installing
@@ -63,11 +63,12 @@ To install the project, follow these steps:
    pip install -r requirements.txt
    ```
 
-4. (Optional) Install moondream if you
-   want to use the incognito mode
+4. Install ollama and pull the model moondream if you want to recognize images
     ```bash
     ollama pull moondream
     ```
+   We highly recommend pulling an additional model like llama3 for local ai inference on text files. You can control which ollama model is used by setting the "MODEL" environment variable to a litellm compatible model string.
+5. Setup the environment variables for MODEL OLLAMA_API_BASE and whatever api keys you need
 
 ## Usage
 
diff --git a/main.py b/main.py
index 145c433..3924039 100644
--- a/main.py
+++ b/main.py
@@ -2,7 +2,6 @@
 import json
 import argparse
 import pathlib
-from groq import Groq
 from llama_index.core import SimpleDirectoryReader
 import colorama
 import pathlib
@@ -24,10 +23,6 @@
 @click.argument("dst_path", type=click.Path())
 @click.option("--auto-yes", is_flag=True, help="Automatically say yes to all prompts")
 def main(src_path, dst_path, auto_yes=False):
-    os.environ["GROQ_API_KEY"] = (
-        "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"
-    )
-
     summaries = asyncio.run(get_dir_summaries(src_path))
 
     # Get file tree
diff --git a/requirements.txt b/requirements.txt
index fbd6dea..c0b3c3a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,6 @@ ollama
 chromadb
 llama-index
 litellm
-groq
 docx2txt
 colorama
 termcolor
diff --git a/server.py b/server.py
index 5d45c32..823a7cf 100644
--- a/server.py
+++ b/server.py
@@ -10,14 +10,12 @@
 
 
 import colorama
-import ollama
 import threading
 from asciitree import LeftAligned
 from asciitree.drawing import BOX_LIGHT, BoxStyle
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
-from groq import Groq
 from llama_index.core import SimpleDirectoryReader
 from pydantic import BaseModel
 from termcolor import colored
@@ -28,13 +26,10 @@
 from src.watch_utils import Handler
 from src.watch_utils import create_file_tree as create_watch_file_tree
 
-os.environ["GROQ_API_KEY"] = "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"
-
-
 class Request(BaseModel):
     path: Optional[str] = None
     instruction: Optional[str] = None
-    incognito: Optional[bool] = False
+    incognito: Optional[bool] = True
 
 
 class CommitRequest(BaseModel):
@@ -71,9 +66,9 @@ async def batch(request: Request):
         raise HTTPException(
             status_code=400, detail="Path does not exist in filesystem")
 
-    summaries = await get_dir_summaries(path)
+    summaries = await get_dir_summaries(path, incognito=request.incognito)
     # Get file tree
-    files = create_file_tree(summaries)
+    files = create_file_tree(summaries, incognito=request.incognito)
 
     # Recursively create dictionary from file paths
     tree = {}
@@ -106,8 +101,8 @@ async def watch(request: Request):
     response_queue = queue.Queue()
 
     observer = Observer()
-    event_handler = Handler(path, create_watch_file_tree, response_queue)
-    await event_handler.set_summaries()
+    event_handler = Handler(path, create_watch_file_tree, response_queue, incognito=request.incognito)
+    await event_handler.set_summaries(incognito=request.incognito)
     observer.schedule(event_handler, path, recursive=True)
     observer.start()
 
diff --git a/src/loader.py b/src/loader.py
index 2168b3b..2504269 100644
--- a/src/loader.py
+++ b/src/loader.py
@@ -1,26 +1,30 @@
 import asyncio
+import http
+import http.server
 import json
 import os
+import base64
 from collections import defaultdict
 
 import agentops
 import colorama
-import ollama
 import weave
-from groq import AsyncGroq, Groq
+import litellm
+import ollama
 from llama_index.core import Document, SimpleDirectoryReader
 from llama_index.core.schema import ImageDocument
 from llama_index.core.node_parser import TokenTextSplitter
 from termcolor import colored
+from src.select_model import select_model
 
 
 # @weave.op()
 # @agentops.record_function("summarize")
-async def get_dir_summaries(path: str):
+async def get_dir_summaries(path: str, incognito=True):
     doc_dicts = load_documents(path)
     # metadata = process_metadata(doc_dicts)
 
-    summaries = await get_summaries(doc_dicts)
+    summaries = await get_summaries(doc_dicts, incognito=incognito)
 
     # Convert path to relative path
     for summary in summaries:
@@ -90,7 +94,7 @@ def process_metadata(doc_dicts):
     return metadata_list
 
 
-async def summarize_document(doc, client):
+async def summarize_document(doc, incognito = True):
     PROMPT = """
 You will be provided with the contents of a file along with its metadata. Provide a summary of the contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible.
 
@@ -102,25 +106,20 @@ async def summarize_document(doc, client):
     "summary": "summary of the content"
 }
 ```
+
+only return the json, no chit chat
 """.strip()
 
-    max_retries = 5
-    attempt = 0
-    while attempt < max_retries:
-        try:
-            chat_completion = await client.chat.completions.create(
-                messages=[
-                    {"role": "system", "content": PROMPT},
-                    {"role": "user", "content": json.dumps(doc)},
-                ],
-                model="llama3-70b-8192",
-                response_format={"type": "json_object"},
-                temperature=0,
-            )
-            break
-        except Exception as e:
-            print("Error status {}".format(e.status_code))
-            attempt += 1
+
+    chat_completion = litellm.completion(
+        messages=[
+            {"role": "system", "content": PROMPT},
+            {"role": "user", "content": json.dumps(doc)},
+        ],
+        model=select_model(incognito),
+        temperature=0,
+        max_retries=5,
+    )
 
     summary = json.loads(chat_completion.choices[0].message.content)
 
@@ -134,8 +133,12 @@ async def summarize_document(doc, client):
 
     return summary
 
+def convert_image_to_base64(path: str, file_type: str) -> str:
+    with open(path, 'rb') as image_bytes:
+        base64_image = base64.b64encode(image_bytes.read()).decode("utf-8")
+        return f"data:{file_type};base64,{base64_image}"
 
-async def summarize_image_document(doc: ImageDocument, client):
+async def summarize_image_document(doc: ImageDocument):
     PROMPT = """
 You will be provided with an image along with its metadata. Provide a summary of the image contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible.
 
@@ -149,25 +152,30 @@ async def summarize_image_document(doc: ImageDocument, client):
 ```
 """.strip()
 
-    client = ollama.AsyncClient()
-    chat_completion = await client.chat(
+    chat_completion = litellm.completion(
         messages=[
-            # {"role": "system", "content": "Respond with one short sentence."},
             {
                 "role": "user",
-                "content": "Summarize the contents of this image.",
-                "images": [doc.image_path],
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Summarize the contents of this image."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": convert_image_to_base64(doc.image_path, doc.extra_info.get('file_type'))
+                        }
+                    }
+                ],
             },
         ],
-        model="moondream",
-        # format="json",
-        # stream=True,
-        options={"num_predict": 128},
+        model=os.environ.get('IMAGE_MODEL') if os.environ.get('IMAGE_MODEL') is not None else "ollama/moondream",
     )
 
     summary = {
         "file_path": doc.image_path,
-        "summary": chat_completion["message"]["content"],
+        "summary": chat_completion.choices[0].message.content,
     }
 
     print(colored(summary["file_path"], "green"))  # Print the filename in green
@@ -176,21 +184,18 @@ async def summarize_image_document(doc: ImageDocument, client):
     return summary
 
 
-async def dispatch_summarize_document(doc, client):
+async def dispatch_summarize_document(doc, incognito=True):
     if isinstance(doc, ImageDocument):
-        return await summarize_image_document(doc, client)
+        return await summarize_image_document(doc)
     elif isinstance(doc, Document):
-        return await summarize_document({"content": doc.text, **doc.metadata}, client)
+        return await summarize_document({"content": doc.text, **doc.metadata}, incognito=incognito)
     else:
         raise ValueError("Document type not supported")
 
 
-async def get_summaries(documents):
-    client = AsyncGroq(
-        api_key=os.environ.get("GROQ_API_KEY"),
-    )
+async def get_summaries(documents, incognito=True):
     summaries = await asyncio.gather(
-        *[dispatch_summarize_document(doc, client) for doc in documents]
+        *[dispatch_summarize_document(doc, incognito=incognito) for doc in documents]
     )
     return summaries
 
@@ -219,88 +224,12 @@ def merge_summary_documents(summaries, metadata_list):
 ################################################################################################
 
 
-def get_file_summary(path: str):
-    client = Groq(
-        api_key=os.environ.get("GROQ_API_KEY"),
-    )
+def get_file_summary(path: str, incognito=True):
     reader = SimpleDirectoryReader(input_files=[path]).iter_data()
 
     docs = next(reader)
     splitter = TokenTextSplitter(chunk_size=6144)
     text = splitter.split_text("\n".join([d.text for d in docs]))[0]
     doc = Document(text=text, metadata=docs[0].metadata)
-    summary = dispatch_summarize_document_sync(doc, client)
-    return summary
-
-
-def dispatch_summarize_document_sync(doc, client):
-    if isinstance(doc, ImageDocument):
-        return summarize_image_document_sync(doc, client)
-    elif isinstance(doc, Document):
-        return summarize_document_sync({"content": doc.text, **doc.metadata}, client)
-    else:
-        raise ValueError("Document type not supported")
-
-
-def summarize_document_sync(doc, client):
-    PROMPT = """
-You will be provided with the contents of a file along with its metadata. Provide a summary of the contents. The purpose of the summary is to organize files based on their content. To this end provide a concise but informative summary. Make the summary as specific to the file as possible.
-
-Write your response a JSON object with the following schema:
-    
-```json 
-{
-    "file_path": "path to the file including name",
-    "summary": "summary of the content"
-}
-```
-""".strip()
-
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {"role": "system", "content": PROMPT},
-            {"role": "user", "content": json.dumps(doc)},
-        ],
-        model="llama3-70b-8192",
-        response_format={"type": "json_object"},
-        temperature=0,
-    )
-    summary = json.loads(chat_completion.choices[0].message.content)
-
-    try:
-        print(colored(summary["file_path"], "green"))  # Print the filename in green
-        print(summary["summary"])  # Print the summary of the contents
-        print("-" * 80 + "\n")  # Print a separator line with spacing for readability
-    except KeyError as e:
-        print(e)
-        print(summary)
-
-    return summary
-
-
-def summarize_image_document_sync(doc: ImageDocument, client):
-    client = ollama.Client()
-    chat_completion = client.chat(
-        messages=[
-            {
-                "role": "user",
-                "content": "Summarize the contents of this image.",
-                "images": [doc.image_path],
-            },
-        ],
-        model="moondream",
-        # format="json",
-        # stream=True,
-        options={"num_predict": 128},
-    )
-
-    summary = {
-        "file_path": doc.image_path,
-        "summary": chat_completion["message"]["content"],
-    }
-
-    print(colored(summary["file_path"], "green"))  # Print the filename in green
-    print(summary["summary"])  # Print the summary of the contents
-    print("-" * 80 + "\n")  # Print a separator line with spacing for readability
-
+    summary = dispatch_summarize_document(doc, incognito=incognito)
     return summary
diff --git a/src/select_model.py b/src/select_model.py
new file mode 100644
index 0000000..e23d19a
--- /dev/null
+++ b/src/select_model.py
@@ -0,0 +1,16 @@
+from os import environ
+from litellm import validate_environment
+import warnings
+
+def select_model(incognito=True):
+    model = "groq/llama3-70b-8192" if environ.get("GROQ_API_KEY") and incognito is False else environ.get("MODEL", "ollama/llama3")
+    litellm_validation = validate_environment(model)
+    if litellm_validation.get('keys_in_environment') is False:
+        raise EnvironmentError({
+            "errno": 1, 
+            "strerr": f"missing environment variables for model {model}", 
+            "missing_keys": ','.join(litellm_validation.get("missing_keys"))
+        })
+    if "ollama" not in model:
+        warnings.warn(f"sending the contents of your files to {model}!")
+    return model
\ No newline at end of file
diff --git a/src/tree_generator.py b/src/tree_generator.py
index 13a39d1..ebd9d60 100644
--- a/src/tree_generator.py
+++ b/src/tree_generator.py
@@ -1,7 +1,9 @@
-from groq import Groq
+import litellm
 import json
 import os
 
+from src.select_model import select_model
+
 FILE_PROMPT = """
 You will be provided with list of source files and a summary of their contents. For each file, propose a new path and filename, using a directory structure that optimally organizes the files using known conventions and best practices.
 Follow good naming conventions. Here are a few guidelines
@@ -24,19 +26,20 @@
     ]
 }
 ```
+
+only return the json, no chit chat
 """.strip()
 
 
-def create_file_tree(summaries: list):
-    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-    chat_completion = client.chat.completions.create(
+def create_file_tree(summaries: list, incognito=True):
+    chat_completion = litellm.completion(
         messages=[
             {"role": "system", "content": FILE_PROMPT},
             {"role": "user", "content": json.dumps(summaries)},
         ],
-        model="llama3-70b-8192",
-        response_format={"type": "json_object"},  # Uncomment if needed
+        model=select_model(incognito),
         temperature=0,
+        max_retries=5,
     )
 
     file_tree = json.loads(chat_completion.choices[0].message.content)["files"]
diff --git a/src/watch_utils.py b/src/watch_utils.py
index 8eaa99f..2ef57d8 100644
--- a/src/watch_utils.py
+++ b/src/watch_utils.py
@@ -3,26 +3,28 @@
 import os
 import time
 
-from groq import Groq
+import litellm
 from watchdog.events import FileSystemEvent, FileSystemEventHandler
 from watchdog.observers import Observer
 
+from src.select_model import select_model
 from src.loader import get_dir_summaries, get_file_summary
 
-os.environ["GROQ_API_KEY"] = "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"
-
+watch_incognito = False
 
 class Handler(FileSystemEventHandler):
-    def __init__(self, base_path, callback, queue):
+    def __init__(self, base_path, callback, queue, incognito):
         self.base_path = base_path
         self.callback = callback
         self.queue = queue
+        watch_incognito = incognito
         self.events = []
         print(f"Watching directory {base_path}")
 
-    async def set_summaries(self):
+    async def set_summaries(self, incognito=False):
         print(f"Getting summaries for {self.base_path}")
-        self.summaries = await get_dir_summaries(self.base_path)
+        watch_incognito = incognito
+        self.summaries = await get_dir_summaries(self.base_path, incognito=incognito)
         self.summaries_cache = {s["file_path"]: s for s in self.summaries}
 
     def update_summary(self, file_path):
@@ -31,7 +33,7 @@ def update_summary(self, file_path):
         if not os.path.exists(path):
             self.summaries_cache.pop(file_path)
             return
-        self.summaries_cache[file_path] = get_file_summary(path)
+        self.summaries_cache[file_path] = get_file_summary(path, watch_incognito)
         self.summaries = list(self.summaries_cache.values())
         self.queue.put(
             {
@@ -107,18 +109,18 @@ def create_file_tree(summaries, fs_events):
 ```
 
 Include the above items in your response exactly as is, along all other proposed changes.
+Only return json, no chit chat.
 """.strip()
 
-    client = Groq()
-    cmpl = client.chat.completions.create(
+    cmpl = litellm.completion(
         messages=[
             {"content": FILE_PROMPT, "role": "system"},
             {"content": json.dumps(summaries), "role": "user"},
             {"content": WATCH_PROMPT, "role": "system"},
             {"content": json.dumps(fs_events), "role": "user"},
         ],
-        model="llama3-70b-8192",
-        response_format={"type": "json_object"},
+        model=select_model(watch_incognito),
         temperature=0,
+        max_retries=5,
     )
     return json.loads(cmpl.choices[0].message.content)["files"]