Open-interpreter

kyegomez · Oct 18, 2023 · 1a2452f · 1a2452f
1 parent 2eb0751
commit 1a2452f
Show file tree

Hide file tree

Showing 5 changed files with 288 additions and 0 deletions.
diff --git a/apps/Open-Sourcerer/.env.example b/apps/Open-Sourcerer/.env.example
@@ -0,0 +1,6 @@
+ELEVEN_LABS_API_KEY="<your_api_key>"  # https://elevenlabs.io/speech-synthesis
+OPENAI_API_KEY="your_api_key"
+DISCORD_TOKEN="your_discord_token"
+BOT_ID='your_bot_id'
+API_BASE="open_ai_api_base"
+SYSTEM_MESSAGE="syetem_message"
diff --git a/apps/Open-Sourcerer/Dockerfile b/apps/Open-Sourcerer/Dockerfile
@@ -0,0 +1,22 @@
+# Use an official Python runtime as a parent image
+FROM python:3.10
+
+# Set the working directory in the container to /app
+WORKDIR /app
+
+# Add the current directory contents into the container at /app
+ADD . /app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Clone the Pycord-Development repository and install it
+RUN git clone https://github.com/Pycord-Development/pycord && \
+    cd pycord && \
+    pip install -U .
+
+# Make port 80 available to the world outside this container
+EXPOSE 80
+
+# Run DiscordInterpreter.py when the container launches
+CMD ["python", "main.py"]
diff --git a/apps/Open-Sourcerer/docker-compose.yaml b/apps/Open-Sourcerer/docker-compose.yaml
@@ -0,0 +1,10 @@
+version: '3'
+services:
+  your-service:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    env_file:
+      - .env
+    ports:
+      - "80:80"
diff --git a/apps/Open-Sourcerer/jarvis.py b/apps/Open-Sourcerer/jarvis.py
@@ -0,0 +1,121 @@
+import gradio_client as grc
+import interpreter
+import time
+import gradio as gr
+from pydub import AudioSegment
+import io
+from elevenlabs import generate, play, set_api_key
+import whisper
+import dotenv
+
+dotenv.load_dotenv(".env")
+
+# interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF"
+interpreter.auto_run = True
+model = whisper.load_model("base")
+
+
+def transcribe(audio):
+
+    # load audio and pad/trim it to fit 30 seconds
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+
+    # detect the spoken language
+    _, probs = model.detect_language(mel)
+
+    # decode the audio
+    options = whisper.DecodingOptions()
+    result = whisper.decode(model, mel, options)
+    return result.text
+
+
+set_api_key("ELEVEN_LABS_API_KEY")
+
+
+def get_audio_length(audio_bytes):
+    # Create a BytesIO object from the byte array
+    byte_io = io.BytesIO(audio_bytes)
+
+    # Load the audio data with PyDub
+    audio = AudioSegment.from_mp3(byte_io)
+
+    # Get the length of the audio in milliseconds
+    length_ms = len(audio)
+
+    # Optionally convert to seconds
+    length_s = length_ms / 1000.0
+
+    return length_s
+
+
+def speak(text):
+    speaking = True
+    audio = generate(
+        text=text,
+        voice="Daniel"
+    )
+    play(audio, notebook=True)
+
+    audio_length = get_audio_length(audio)
+    time.sleep(audio_length)
+
+# @title Text-only JARVIS
+# @markdown Run this cell for a ChatGPT-like interface.
+
+
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+
+    def user(user_message, history):
+        return "", history + [[user_message, None]]
+
+    def bot(history):
+
+        user_message = history[-1][0]
+        history[-1][1] = ""
+        active_block_type = ""
+
+        for chunk in interpreter.chat(user_message, stream=True, display=False):
+
+            # Message
+            if "message" in chunk:
+                if active_block_type != "message":
+                    active_block_type = "message"
+                history[-1][1] += chunk["message"]
+                yield history
+
+            # Code
+            if "language" in chunk:
+                language = chunk["language"]
+            if "code" in chunk:
+                if active_block_type != "code":
+                    active_block_type = "code"
+                    history[-1][1] += f"\n```{language}\n"
+                history[-1][1] += chunk["code"]
+                yield history
+
+            # Output
+            if "executing" in chunk:
+                history[-1][1] += "\n```\n\n```text\n"
+                yield history
+            if "output" in chunk:
+                if chunk["output"] != "KeyboardInterrupt":
+                    history[-1][1] += chunk["output"] + "\n"
+                    yield history
+            if "end_of_execution" in chunk:
+                history[-1][1] = history[-1][1].strip()
+                history[-1][1] += "\n```\n"
+                yield history
+
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    )
+
+if __name__ == '__main__':
+    demo.queue()
+    demo.launch(debug=True)
diff --git a/apps/Open-Sourcerer/main.py b/apps/Open-Sourcerer/main.py
@@ -0,0 +1,129 @@
+import os
+import discord
+from discord.ext import commands
+import interpreter
+import dotenv
+import whisper
+from jarvis import transcribe
+
+dotenv.load_dotenv(".env")
+
+bot_token = os.getenv("DISCORD_TOKEN")
+bot_id = os.getenv("BOT_ID")
+
+# interpreter.api_key = os.getenv("API_KEY")
+# interpreter.api_base = os.getenv("API_BASE")
+# interpreter.auto_run = True
+
+
+def split_text(text, chunk_size=1500):
+    #########################################################################
+    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+
+
+# discord initial
+intents = discord.Intents.all()
+intents.message_content = True
+client = commands.Bot(command_prefix="$", intents=intents)
+
+message_chunks = []
+send_image = False
+
+
+@client.event
+async def on_message(message):
+    await client.process_commands(message)
+    bot_mention = f'<@{bot_id}>'
+    if (bot_mention not in message.content) or (message.author == client.user or message.content[0] == '$'):
+        return
+    response = []
+    for chunk in interpreter.chat(message.content, display=False, stream=False):
+        # await message.channel.send(chunk)
+        if 'message' in chunk:
+            response.append(chunk['message'])
+    last_response = response[-1]
+
+    max_message_length = 2000  # Discord's max message length is 2000 characters
+    # Splitting the message into chunks of 2000 characters
+    response_chunks = [last_response[i:i + max_message_length]
+                       for i in range(0, len(last_response), max_message_length)]
+    # Sending each chunk as a separate message
+    for chunk in response_chunks:
+        await message.channel.send(chunk)
+
+
+@client.command()
+async def join(ctx):
+    if ctx.author.voice:
+        channel = ctx.message.author.voice.channel
+        print('joining..')
+        await channel.connect()
+        print('joined.')
+    else:
+        print("not in a voice channel!")
+
+
+@client.command()
+async def leave(ctx):
+    if ctx.voice_client:
+        await ctx.voice_client.disconnect()
+    else:
+        print("not in a voice channel!")
+
+
+@client.command()
+async def listen(ctx):
+    if ctx.voice_client:
+        print('trying to listen..')
+        ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx)
+        print('listening..')
+    else:
+        print("not in a voice channel!")
+
+def transcribe(audio):
+
+    # load audio and pad/trim it to fit 30 seconds
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+
+    # detect the spoken language
+    _, probs = model.detect_language(mel)
+
+    # decode the audio
+    options = whisper.DecodingOptions()
+    result = whisper.decode(model, mel, options)
+    return result.text
+
+async def callback(sink: discord.sinks, ctx):
+    print('in callback..')
+    for user_id, audio in sink.audio_data.items():
+        if user_id == ctx.author.id:
+            print('saving audio..')
+            audio: discord.sinks.core.AudioData = audio
+            print(user_id)
+            filename = "audio.wav"
+            with open(filename, "wb") as f:
+                f.write(audio.file.getvalue())
+            print('audio saved.')
+            transcription = transcribe(filename)
+            print(transcription)
+            response = []
+            for chunk in interpreter.chat(transcription, display=False, stream=True):
+                # await message.channel.send(chunk)
+                if 'message' in chunk:
+                    response.append(chunk['message'])
+            await ctx.message.channel.send(' '.join(response))
+
+@client.command()
+async def stop(ctx):
+    ctx.voice_client.stop_recording()
+
+
+@client.event
+async def on_ready():
+    print(f"We have logged in as {client.user}")
+
+client.run(bot_token)