-
-
Notifications
You must be signed in to change notification settings - Fork 441
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2eb0751
commit 1a2452f
Showing
5 changed files
with
288 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
ELEVEN_LABS_API_KEY="<your_api_key>" # https://elevenlabs.io/speech-synthesis | ||
OPENAI_API_KEY="your_api_key" | ||
DISCORD_TOKEN="your_discord_token" | ||
BOT_ID='your_bot_id' | ||
API_BASE="open_ai_api_base" | ||
SYSTEM_MESSAGE="syetem_message" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Use an official Python runtime as a parent image | ||
FROM python:3.10 | ||
|
||
# Set the working directory in the container to /app | ||
WORKDIR /app | ||
|
||
# Add the current directory contents into the container at /app | ||
ADD . /app | ||
|
||
# Install any needed packages specified in requirements.txt | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
# Clone the Pycord-Development repository and install it | ||
RUN git clone https://github.com/Pycord-Development/pycord && \ | ||
cd pycord && \ | ||
pip install -U . | ||
|
||
# Make port 80 available to the world outside this container | ||
EXPOSE 80 | ||
|
||
# Run DiscordInterpreter.py when the container launches | ||
CMD ["python", "main.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
version: '3' | ||
services: | ||
your-service: | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
env_file: | ||
- .env | ||
ports: | ||
- "80:80" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import gradio_client as grc | ||
import interpreter | ||
import time | ||
import gradio as gr | ||
from pydub import AudioSegment | ||
import io | ||
from elevenlabs import generate, play, set_api_key | ||
import whisper | ||
import dotenv | ||
|
||
dotenv.load_dotenv(".env") | ||
|
||
# interpreter.model = "TheBloke/Mistral-7B-OpenOrca-GGUF" | ||
interpreter.auto_run = True | ||
model = whisper.load_model("base") | ||
|
||
|
||
def transcribe(audio): | ||
|
||
# load audio and pad/trim it to fit 30 seconds | ||
audio = whisper.load_audio(audio) | ||
audio = whisper.pad_or_trim(audio) | ||
|
||
# make log-Mel spectrogram and move to the same device as the model | ||
mel = whisper.log_mel_spectrogram(audio).to(model.device) | ||
|
||
# detect the spoken language | ||
_, probs = model.detect_language(mel) | ||
|
||
# decode the audio | ||
options = whisper.DecodingOptions() | ||
result = whisper.decode(model, mel, options) | ||
return result.text | ||
|
||
|
||
set_api_key("ELEVEN_LABS_API_KEY") | ||
|
||
|
||
def get_audio_length(audio_bytes): | ||
# Create a BytesIO object from the byte array | ||
byte_io = io.BytesIO(audio_bytes) | ||
|
||
# Load the audio data with PyDub | ||
audio = AudioSegment.from_mp3(byte_io) | ||
|
||
# Get the length of the audio in milliseconds | ||
length_ms = len(audio) | ||
|
||
# Optionally convert to seconds | ||
length_s = length_ms / 1000.0 | ||
|
||
return length_s | ||
|
||
|
||
def speak(text): | ||
speaking = True | ||
audio = generate( | ||
text=text, | ||
voice="Daniel" | ||
) | ||
play(audio, notebook=True) | ||
|
||
audio_length = get_audio_length(audio) | ||
time.sleep(audio_length) | ||
|
||
# @title Text-only JARVIS | ||
# @markdown Run this cell for a ChatGPT-like interface. | ||
|
||
|
||
with gr.Blocks() as demo: | ||
chatbot = gr.Chatbot() | ||
msg = gr.Textbox() | ||
|
||
def user(user_message, history): | ||
return "", history + [[user_message, None]] | ||
|
||
def bot(history): | ||
|
||
user_message = history[-1][0] | ||
history[-1][1] = "" | ||
active_block_type = "" | ||
|
||
for chunk in interpreter.chat(user_message, stream=True, display=False): | ||
|
||
# Message | ||
if "message" in chunk: | ||
if active_block_type != "message": | ||
active_block_type = "message" | ||
history[-1][1] += chunk["message"] | ||
yield history | ||
|
||
# Code | ||
if "language" in chunk: | ||
language = chunk["language"] | ||
if "code" in chunk: | ||
if active_block_type != "code": | ||
active_block_type = "code" | ||
history[-1][1] += f"\n```{language}\n" | ||
history[-1][1] += chunk["code"] | ||
yield history | ||
|
||
# Output | ||
if "executing" in chunk: | ||
history[-1][1] += "\n```\n\n```text\n" | ||
yield history | ||
if "output" in chunk: | ||
if chunk["output"] != "KeyboardInterrupt": | ||
history[-1][1] += chunk["output"] + "\n" | ||
yield history | ||
if "end_of_execution" in chunk: | ||
history[-1][1] = history[-1][1].strip() | ||
history[-1][1] += "\n```\n" | ||
yield history | ||
|
||
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | ||
bot, chatbot, chatbot | ||
) | ||
|
||
if __name__ == '__main__': | ||
demo.queue() | ||
demo.launch(debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import os | ||
import discord | ||
from discord.ext import commands | ||
import interpreter | ||
import dotenv | ||
import whisper | ||
from jarvis import transcribe | ||
|
||
dotenv.load_dotenv(".env") | ||
|
||
bot_token = os.getenv("DISCORD_TOKEN") | ||
bot_id = os.getenv("BOT_ID") | ||
|
||
# interpreter.api_key = os.getenv("API_KEY") | ||
# interpreter.api_base = os.getenv("API_BASE") | ||
# interpreter.auto_run = True | ||
|
||
|
||
def split_text(text, chunk_size=1500): | ||
######################################################################### | ||
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | ||
|
||
|
||
# discord initial | ||
intents = discord.Intents.all() | ||
intents.message_content = True | ||
client = commands.Bot(command_prefix="$", intents=intents) | ||
|
||
message_chunks = [] | ||
send_image = False | ||
|
||
|
||
@client.event | ||
async def on_message(message): | ||
await client.process_commands(message) | ||
bot_mention = f'<@{bot_id}>' | ||
if (bot_mention not in message.content) or (message.author == client.user or message.content[0] == '$'): | ||
return | ||
response = [] | ||
for chunk in interpreter.chat(message.content, display=False, stream=False): | ||
# await message.channel.send(chunk) | ||
if 'message' in chunk: | ||
response.append(chunk['message']) | ||
last_response = response[-1] | ||
|
||
max_message_length = 2000 # Discord's max message length is 2000 characters | ||
# Splitting the message into chunks of 2000 characters | ||
response_chunks = [last_response[i:i + max_message_length] | ||
for i in range(0, len(last_response), max_message_length)] | ||
# Sending each chunk as a separate message | ||
for chunk in response_chunks: | ||
await message.channel.send(chunk) | ||
|
||
|
||
@client.command() | ||
async def join(ctx): | ||
if ctx.author.voice: | ||
channel = ctx.message.author.voice.channel | ||
print('joining..') | ||
await channel.connect() | ||
print('joined.') | ||
else: | ||
print("not in a voice channel!") | ||
|
||
|
||
@client.command() | ||
async def leave(ctx): | ||
if ctx.voice_client: | ||
await ctx.voice_client.disconnect() | ||
else: | ||
print("not in a voice channel!") | ||
|
||
|
||
@client.command() | ||
async def listen(ctx): | ||
if ctx.voice_client: | ||
print('trying to listen..') | ||
ctx.voice_client.start_recording(discord.sinks.WaveSink(), callback, ctx) | ||
print('listening..') | ||
else: | ||
print("not in a voice channel!") | ||
|
||
def transcribe(audio): | ||
|
||
# load audio and pad/trim it to fit 30 seconds | ||
audio = whisper.load_audio(audio) | ||
audio = whisper.pad_or_trim(audio) | ||
|
||
# make log-Mel spectrogram and move to the same device as the model | ||
mel = whisper.log_mel_spectrogram(audio).to(model.device) | ||
|
||
# detect the spoken language | ||
_, probs = model.detect_language(mel) | ||
|
||
# decode the audio | ||
options = whisper.DecodingOptions() | ||
result = whisper.decode(model, mel, options) | ||
return result.text | ||
|
||
async def callback(sink: discord.sinks, ctx): | ||
print('in callback..') | ||
for user_id, audio in sink.audio_data.items(): | ||
if user_id == ctx.author.id: | ||
print('saving audio..') | ||
audio: discord.sinks.core.AudioData = audio | ||
print(user_id) | ||
filename = "audio.wav" | ||
with open(filename, "wb") as f: | ||
f.write(audio.file.getvalue()) | ||
print('audio saved.') | ||
transcription = transcribe(filename) | ||
print(transcription) | ||
response = [] | ||
for chunk in interpreter.chat(transcription, display=False, stream=True): | ||
# await message.channel.send(chunk) | ||
if 'message' in chunk: | ||
response.append(chunk['message']) | ||
await ctx.message.channel.send(' '.join(response)) | ||
|
||
@client.command() | ||
async def stop(ctx): | ||
ctx.voice_client.stop_recording() | ||
|
||
|
||
@client.event | ||
async def on_ready(): | ||
print(f"We have logged in as {client.user}") | ||
|
||
client.run(bot_token) |