-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgc_texttospeech.py
60 lines (50 loc) · 2.35 KB
/
gc_texttospeech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from google.cloud import texttospeech
# Create a client
client = texttospeech.TextToSpeechClient()
# Set the input text
input_text = """
Virtual agent: Hello! How may I assist you today?
Caller: I'd like to open a new savings account.
Virtual agent: Certainly! Before we proceed, may I have your full name, please?
Caller: Mary Johnson.
Virtual agent: Thank you, Mary. Can you also provide a valid phone number?
Caller: Sure, it's 555-123-4567.
Virtual agent: Great. To proceed with the account opening, we'll need some additional information. Could you please confirm your date of birth?
Caller: December 3, 1990.
Virtual agent: Perfect. Now, for security purposes, could you provide your mother's maiden name?
Caller: Smith.
Virtual agent: Thank you, Mary. I have all the necessary details. A new savings account will be created for you shortly, and the account information will be sent to [email protected]. Is there anything else you'd like to inquire about?
Caller: No, that's it. Thank you!
Virtual agent: You're welcome! If you have any further questions, feel free to reach out. Have a wonderful day!
"""
# Set the voice parameters
voice_params = {
"en-US": {
"virtual_agent": {"name": "en-US-Wavenet-D", "gender": "MALE"},
"caller": {"name": "en-US-Wavenet-B", "gender": "FEMALE"}
}
}
# Split the input text into agent and caller parts
parts = input_text.strip().split("\n")
# Generate audio for each part
audio_files = []
for i, part in enumerate(parts):
role, text = part.split(":", 1)
role = role.strip().lower()
# Determine the voice based on the role
voice_name = voice_params["en-US"][role]["name"]
voice_gender = voice_params["en-US"][role]["gender"]
# Set the voice parameters
voice = texttospeech.VoiceSelectionParams(
language_code="en-US", name=voice_name, ssml_gender=voice_gender
)
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.LINEAR16)
# Synthesize speech
synthesis_input = texttospeech.SynthesisInput(text=text.strip())
response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
# Save the audio to a file
output_file = f"output_{i}.wav"
with open(output_file, "wb") as f:
f.write(response.audio_content)
audio_files.append(output_file)
print("Audio files generated:", audio_files)