Skip to content

Commit

Permalink
adding updated version of xtts v2 (#375)
Browse files Browse the repository at this point in the history
  • Loading branch information
htrivedi99 authored Nov 5, 2024
1 parent 7786b53 commit ce272e9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 52 deletions.
8 changes: 4 additions & 4 deletions xtts-v2-truss/config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
environment_variables:
COQUI_TOS_AGREED: '1'
COQUI_TOS_AGREED: "1"
external_package_dirs: []
model_metadata:
example_model_input:
language: en
speaker_voice: <BASE64-STRING>
text: I love robots. Robots are cool!
speaker_voice: Claribel Dervla
text: Kurt watched the incoming Pelicans. The blocky jet-powered craft were so distant they were only specks against the setting sun. He hit the magnification on his faceplate and saw lines of fire tracing their reentry vectors. They would touch down in three minutes.
tags:
- text-to-speech
model_name: XTTS V2
python_version: py310
requirements:
- git+https://github.com/coqui-ai/TTS.git@v0.21.3
- git+https://github.com/htrivedi99/TTS.git
resources:
accelerator: T4
cpu: '3'
Expand Down
65 changes: 17 additions & 48 deletions xtts-v2-truss/model/model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import base64
import os
from tempfile import NamedTemporaryFile
from typing import Dict

from TTS.api import TTS

DEFAULT_SPEAKER_NAME = "Claribel Dervla"


class Model:
def __init__(self, **kwargs):
Expand All @@ -24,51 +26,18 @@ def wav_to_base64(self, file_path):
base64_string = base64_data.decode("utf-8")
return base64_string

def preprocess(self, request: Dict) -> Dict:
text = request.get("text")
speaker_voice = request.get("speaker_voice")
language = request.get("language")
supported_languages = {
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh-cn",
}

if language not in supported_languages:
return {
"output": f"The language you chose is not supported. Please select from the following choices: {supported_languages}"
}

self.base64_to_wav(speaker_voice, "speaker_voice.wav")
return {
"text": text,
"speaker_voice": "speaker_voice.wav",
"language": language,
}

def predict(self, request: Dict) -> Dict:
text = request.pop("text")
speaker_voice = request.pop("speaker_voice")
language = request.pop("language")
self.model.tts_to_file(
text=text,
file_path="output.wav",
speaker_wav=speaker_voice,
language=language,
)

base64_string = self.wav_to_base64("output.wav")

os.remove("speaker_voice.wav")
os.remove("output.wav")
return {"output": base64_string}
text = request.get("text")
speaker_voice = request.get("speaker_voice", DEFAULT_SPEAKER_NAME)
language = request.get("language", "en")

with NamedTemporaryFile(delete=True) as fp:
self.model.tts_to_file(
text=text,
file_path=fp.name,
speaker=speaker_voice,
language=language,
)

base64_string = self.wav_to_base64(fp.name)
return {"output": base64_string}

0 comments on commit ce272e9

Please sign in to comment.