From f903063f138f3d7141eb4fc39c1f24a3a521f072 Mon Sep 17 00:00:00 2001 From: David vonThenen <12752197+dvonthenen@users.noreply.github.com> Date: Tue, 6 Aug 2024 07:12:29 -0700 Subject: [PATCH] Fix HTTPX Streaming --- deepgram/clients/abstract_async_client.py | 1 - deepgram/clients/abstract_sync_client.py | 6 +- examples/requirements-examples.txt | 4 +- .../rest/raw/hello_world_play/main.py | 103 ++++++++++++++++++ 4 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 examples/text-to-speech/rest/raw/hello_world_play/main.py diff --git a/deepgram/clients/abstract_async_client.py b/deepgram/clients/abstract_async_client.py index 03b86a74..1ff53ba9 100644 --- a/deepgram/clients/abstract_async_client.py +++ b/deepgram/clients/abstract_async_client.py @@ -348,7 +348,6 @@ async def _handle_request_raw( client = httpx.AsyncClient(timeout=timeout, transport=transport) if transport: kwargs.pop("transport") - kwargs.pop("transport") req = client.build_request(method, _url, headers=_headers, **kwargs) return await client.send(req, stream=True) diff --git a/deepgram/clients/abstract_sync_client.py b/deepgram/clients/abstract_sync_client.py index 6e9d2976..bff6e27c 100644 --- a/deepgram/clients/abstract_sync_client.py +++ b/deepgram/clients/abstract_sync_client.py @@ -337,9 +337,9 @@ def _handle_request_raw( try: transport = kwargs.get("transport") - with httpx.Client(timeout=timeout, transport=transport) as client: - if transport: - kwargs.pop("transport") + client = httpx.Client(timeout=timeout, transport=transport) + if transport: + kwargs.pop("transport") req = client.build_request(method, _url, headers=_headers, **kwargs) return client.send(req, stream=True) diff --git a/examples/requirements-examples.txt b/examples/requirements-examples.txt index ee220abd..9ce82b93 100644 --- a/examples/requirements-examples.txt +++ b/examples/requirements-examples.txt @@ -4,4 +4,6 @@ python-dotenv # streaming libs -pyaudio \ No newline at end of file +pyaudio +sounddevice==0.4.7 +numpy==2.0.1 \ No newline at end of file diff --git a/examples/text-to-speech/rest/raw/hello_world_play/main.py b/examples/text-to-speech/rest/raw/hello_world_play/main.py new file mode 100644 index 00000000..3e84e9eb --- /dev/null +++ b/examples/text-to-speech/rest/raw/hello_world_play/main.py @@ -0,0 +1,103 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import sounddevice as sd +import numpy as np +import queue +import threading + +from deepgram import ( + DeepgramClient, + SpeakOptions, +) + +SPEAK_TEXT = {"text": "Hello world!"} + + +# Define a queue to manage audio data +audio_queue = queue.Queue(maxsize=20) # Adjust size as needed + +element_size = np.dtype(np.int16).itemsize # Element size for np.int16 (16-bit integer) +CHUNK_SIZE = 32768 # Desired size of each audio chunk in bytes + + +def fetch_audio(response): + try: + buffer = bytearray() # Buffer to accumulate data + for data in response.iter_bytes(): + buffer.extend(data) # Add incoming data to buffer + while len(buffer) >= CHUNK_SIZE: + # Extract a chunk of the desired size + chunk = buffer[:CHUNK_SIZE] + buffer = buffer[CHUNK_SIZE:] # Remove the chunk from the buffer + + # Ensure the chunk is aligned to the element size + buffer_size = len(chunk) - (len(chunk) % element_size) + + if buffer_size > 0: + audio_data = np.frombuffer(chunk[:buffer_size], dtype=np.int16) + audio_queue.put(audio_data) + print( + f"Queued audio data of size: {audio_data.size * element_size} bytes" + ) + + # Process any remaining data in the buffer + if buffer: + audio_data = np.frombuffer(buffer, dtype=np.int16) + audio_queue.put(audio_data) + print( + f"Queued remaining audio data of size: {audio_data.size * element_size} bytes" + ) + + # Signal the end of the stream + audio_queue.put(None) + print("End of audio stream.") + except Exception as e: + print(f"Fetch audio exception: {e}") + + +def main(): + try: + # STEP 1: Create a Deepgram client using the API key from environment variables + deepgram: DeepgramClient = DeepgramClient() + + # STEP 2: Call the save method on the speak property + options = SpeakOptions( + model="aura-asteria-en", + encoding="linear16", + container="none", + sample_rate=48000, + ) + + response = deepgram.speak.rest.v("1").stream_raw(SPEAK_TEXT, options) + + # Display response headers + print("Response headers:") + for header in response.headers: + print(f"{header}: {response.headers[header]}") + + # Create and start a separate thread for fetching audio + fetch_thread = threading.Thread(target=fetch_audio, args=(response,)) + fetch_thread.start() + + # Play audio data from the queue + while True: + audio_data = audio_queue.get() + if audio_data is None: + break # End of stream + + # Play audio data using sounddevice + sd.play(audio_data, samplerate=48000) + sd.wait() # Wait for the audio to finish playing + + fetch_thread.join() + + print("Audio playback finished.") + + except Exception as e: + print(f"Exception: {e}") + + +if __name__ == "__main__": + main()