Merge pull request #9 from MadMaximusJB/image_gen

Added image generation functionality
MadMaximusJB · Jun 26, 2024 · 34e2cde · 34e2cde
2 parents b12476b + b314e68
commit 34e2cde
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # quorra
 
-This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-Speech for advanced voice and text interaction. It features long-term memory by vectorizing conversations and recalling relevant past interactions.
+This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-Speech for advanced voice and text interaction. It features internet search and image generation capabilities using a combination of OpenAI and Google services.
 
 ### Features
 
@@ -54,7 +54,7 @@ This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-S
     DEFAULT_VOICE_NAME = 'en-US-Wavenet-D'  # Replace with your desired voice
     DEFAULT_AUDIO_ENCODING = 'LINEAR16'  # Use LINEAR16 for streaming audio
     GOOGLE_CSE_API_KEY = 'YOUR_CSE_API_KEY'
-    GOOGLE_CSE_ID = 'YOUR_CSE_ID
+    GOOGLE_CSE_ID = 'YOUR_CSE_ID'
     ```
 
 ### Usage

diff --git a/main.py b/main.py
@@ -2,6 +2,7 @@
 from utils.text_processing import process_text_with_gpt4o, summarize_text_with_gpt4o
 from utils.text_to_speech import text_to_speech_with_google
 from utils.web_search import search_web
+from utils.image_generation import generate_image, save_and_open_image
 from config import DEFAULT_VOICE_NAME
 import keyboard
 
@@ -19,6 +20,12 @@ def main(voice_name=DEFAULT_VOICE_NAME):
                 search_results = search_web(search_query)
                 summarized_results = summarize_text_with_gpt4o(search_results)
                 response_text = summarized_results
+            elif "generate image with prompt" in user_input.lower():
+                    image_prompt = user_input.lower().replace("generate image with prompt", "").strip()
+                    image = generate_image(image_prompt)
+                    if image:
+                        save_and_open_image(image)
+                    response_text = f"Image generation complete for prompt: {image_prompt}"
             else:
                 response_text = process_text_with_gpt4o(user_input, conversation_history)
 

diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ google-cloud-texttospeech
 SpeechRecognition==3.8.1
 keyboard==0.13.5
 setuptools
-requests
+requests
+Pillow
diff --git a/utils/image_generation.py b/utils/image_generation.py
@@ -0,0 +1,41 @@
+from openai import OpenAI
+import requests
+from PIL import Image
+from io import BytesIO
+import os
+import platform
+import subprocess
+from config import OPENAI_API_KEY
+
+def generate_image(prompt):
+    try:
+        # Set the API key
+        client = OpenAI(api_key=OPENAI_API_KEY)
+
+        response = client.images.generate(
+            prompt=prompt,
+            n=1,
+            size="1024x1024",
+            model="dall-e-3",
+        )
+
+        # Access the URL from the response object
+        image_url = response.data[0].url
+        image_response = requests.get(image_url)
+        image = Image.open(BytesIO(image_response.content))
+        return image
+    except Exception as e:
+        print(f"Error generating image: {e}")
+        return None
+
+def save_and_open_image(image, file_path='generated_image.png'):
+    try:
+        image.save(file_path)
+        if platform.system() == "Windows":
+            os.startfile(file_path)
+        elif platform.system() == "Linux":
+            subprocess.call(["xdg-open", file_path])
+        else:
+            print(f"Opening images is not supported on this platform: {platform.system()}")
+    except Exception as e:
+        print(f"Error saving or opening image: {e}")