forked from Plachtaa/VITS-fast-fine-tuning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_voice_collect.py
72 lines (64 loc) · 2.66 KB
/
user_voice_collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import torch
import torchaudio
import gradio as gr
import os
anno_lines = []
with open("./user_voice/user_voice.txt", 'r', encoding='utf-8') as f:
for line in f.readlines():
anno_lines.append(line.strip("\n"))
text_index = 0
def display_text(index):
index = int(index)
global text_index
text_index = index
return f"{text_index}: " + anno_lines[index].split("|")[2].strip("[ZH]").strip("[EN]").strip("[ES]")
def display_prev_text():
global text_index
if text_index != 0:
text_index -= 1
return f"{text_index}: " + anno_lines[text_index].split("|")[2].strip("[ZH]").strip("[EN]").strip("[ES]")
def display_next_text():
global text_index
if text_index != len(anno_lines)-1:
text_index += 1
return f"{text_index}: " + anno_lines[text_index].split("|")[2].strip("[ZH]").strip("[EN]").strip("[ES]")
def save_audio(audio):
global text_index
if audio:
sr, wav = audio
wav = torch.tensor(wav).type(torch.float32) / max(wav.max(), -wav.min())
wav = wav.unsqueeze(0) if len(wav.shape) == 1 else wav
if sr != 22050:
res_wav = torchaudio.transforms.Resample(orig_freq=sr, new_freq=22050)(wav)
else:
res_wav = wav
torchaudio.save(f"./user_voice/{str(text_index)}.wav", res_wav, 22050, channels_first=True)
return f"Audio saved to ./user_voice/{str(text_index)}.wav successfully!"
else:
return "Error: Please record your audio!"
if __name__ == "__main__":
app = gr.Blocks()
with app:
with gr.Row():
text = gr.Textbox(value="0: " + anno_lines[0].split("|")[2].strip("[ZH]"), label="Please read the text here")
with gr.Row():
audio_to_collect = gr.Audio(source="microphone")
with gr.Row():
with gr.Column():
prev_btn = gr.Button(value="Previous")
with gr.Column():
next_btn = gr.Button(value="Next")
with gr.Row():
index_dropdown = gr.Dropdown(choices=[str(i) for i in range(len(anno_lines))], value="0",
label="No. of text", interactive=True)
with gr.Row():
with gr.Column():
save_btn = gr.Button(value="Save Audio")
with gr.Column():
audio_save_message = gr.Textbox(label="Message")
index_dropdown.change(display_text, inputs=index_dropdown, outputs=text)
prev_btn.click(display_prev_text, inputs=None, outputs=text)
next_btn.click(display_next_text, inputs=None, outputs=text)
save_btn.click(save_audio, inputs=audio_to_collect, outputs=audio_save_message)
app.launch()