Skip to content

Commit

Permalink
Merge pull request #9 from MadMaximusJB/image_gen
Browse files Browse the repository at this point in the history
Added image generation functionality
  • Loading branch information
MadMaximusJB authored Jun 26, 2024
2 parents b12476b + b314e68 commit 34e2cde
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 3 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# quorra

This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-Speech for advanced voice and text interaction. It features long-term memory by vectorizing conversations and recalling relevant past interactions.
This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-Speech for advanced voice and text interaction. It features internet search and image generation capabilities using a combination of OpenAI and Google services.

### Features

Expand Down Expand Up @@ -54,7 +54,7 @@ This project is a custom voice assistant using GPT-4o and Google Cloud Text-to-S
DEFAULT_VOICE_NAME = 'en-US-Wavenet-D' # Replace with your desired voice
DEFAULT_AUDIO_ENCODING = 'LINEAR16' # Use LINEAR16 for streaming audio
GOOGLE_CSE_API_KEY = 'YOUR_CSE_API_KEY'
GOOGLE_CSE_ID = 'YOUR_CSE_ID
GOOGLE_CSE_ID = 'YOUR_CSE_ID'
```

### Usage
Expand Down
7 changes: 7 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from utils.text_processing import process_text_with_gpt4o, summarize_text_with_gpt4o
from utils.text_to_speech import text_to_speech_with_google
from utils.web_search import search_web
from utils.image_generation import generate_image, save_and_open_image
from config import DEFAULT_VOICE_NAME
import keyboard

Expand All @@ -19,6 +20,12 @@ def main(voice_name=DEFAULT_VOICE_NAME):
search_results = search_web(search_query)
summarized_results = summarize_text_with_gpt4o(search_results)
response_text = summarized_results
elif "generate image with prompt" in user_input.lower():
image_prompt = user_input.lower().replace("generate image with prompt", "").strip()
image = generate_image(image_prompt)
if image:
save_and_open_image(image)
response_text = f"Image generation complete for prompt: {image_prompt}"
else:
response_text = process_text_with_gpt4o(user_input, conversation_history)

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ google-cloud-texttospeech
SpeechRecognition==3.8.1
keyboard==0.13.5
setuptools
requests
requests
Pillow
41 changes: 41 additions & 0 deletions utils/image_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from openai import OpenAI
import requests
from PIL import Image
from io import BytesIO
import os
import platform
import subprocess
from config import OPENAI_API_KEY

def generate_image(prompt):
try:
# Set the API key
client = OpenAI(api_key=OPENAI_API_KEY)

response = client.images.generate(
prompt=prompt,
n=1,
size="1024x1024",
model="dall-e-3",
)

# Access the URL from the response object
image_url = response.data[0].url
image_response = requests.get(image_url)
image = Image.open(BytesIO(image_response.content))
return image
except Exception as e:
print(f"Error generating image: {e}")
return None

def save_and_open_image(image, file_path='generated_image.png'):
try:
image.save(file_path)
if platform.system() == "Windows":
os.startfile(file_path)
elif platform.system() == "Linux":
subprocess.call(["xdg-open", file_path])
else:
print(f"Opening images is not supported on this platform: {platform.system()}")
except Exception as e:
print(f"Error saving or opening image: {e}")

0 comments on commit 34e2cde

Please sign in to comment.