-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnarrator.py
122 lines (97 loc) · 3.38 KB
/
narrator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
from openai import OpenAI
import base64
import json
import time
import simpleaudio as sa
import errno
from elevenlabs import generate, play, set_api_key, voices
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Get API keys from environment variables
elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
narrator_api_key = os.getenv("NARRATOR_API_KEY")
replicate_api_key = os.getenv("REPLICATE_API_KEY")
elevenlabs_voice_id = os.getenv("ELEVENLABS_VOICE_ID")
# Check if required API keys are set
if not elevenlabs_api_key:
raise ValueError("ELEVENLABS_API_KEY is not set in the .env file.")
if not openai_api_key:
raise ValueError("OPENAI_API_KEY is not set in the .env file.")
# Set API keys
set_api_key(elevenlabs_api_key)
os.environ["OPENAI_API_KEY"] = openai_api_key
client = OpenAI()
def encode_image(image_path):
while True:
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
except IOError as e:
if e.errno != errno.EACCES:
# Not a "file in use" error, re-raise
raise
# File is being written to, wait a bit and retry
time.sleep(0.1)
def play_audio(text):
audio = generate(text, voice=os.environ.get("ELEVENLABS_VOICE_ID"))
unique_id = base64.urlsafe_b64encode(os.urandom(30)).decode("utf-8").rstrip("=")
dir_path = os.path.join("narration", unique_id)
os.makedirs(dir_path, exist_ok=True)
file_path = os.path.join(dir_path, "audio.wav")
with open(file_path, "wb") as f:
f.write(audio)
play(audio)
def generate_new_line(base64_image):
return [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
},
]
def analyze_image(base64_image, script):
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": """
You are Sir David Attenborough. Narrate the picture of the human as if it is a nature documentary.
Make it snarky and funny. Don't repeat yourself. Make it short. If I do anything remotely interesting, make a big deal about it!
""",
},
]
+ script
+ generate_new_line(base64_image),
max_tokens=500,
)
response_text = response.choices[0].message.content
return response_text
def main():
script = []
while True:
# path to your image
image_path = os.path.join(os.getcwd(), "./frames/frame.jpg")
# getting the base64 encoding
base64_image = encode_image(image_path)
# analyze posture
print("👀 David is watching...")
analysis = analyze_image(base64_image, script=script)
print("🎙️ David says:")
print(analysis)
play_audio(analysis)
script = script + [{"role": "assistant", "content": analysis}]
# wait for 5 seconds
time.sleep(5)
if __name__ == "__main__":
main()