Merge pull request #448 from dvonthenen/fix-httpx-streaming

Fix HTTPX Streaming
deepgram · Aug 14, 2024 · fff2d0e · fff2d0e
2 parents 91fc36d + f903063
commit fff2d0e
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 5 deletions.
diff --git a/deepgram/clients/abstract_async_client.py b/deepgram/clients/abstract_async_client.py
@@ -348,7 +348,6 @@ async def _handle_request_raw(
             client = httpx.AsyncClient(timeout=timeout, transport=transport)
             if transport:
                 kwargs.pop("transport")
-            kwargs.pop("transport")
             req = client.build_request(method, _url, headers=_headers, **kwargs)
             return await client.send(req, stream=True)
 

diff --git a/deepgram/clients/abstract_sync_client.py b/deepgram/clients/abstract_sync_client.py
@@ -337,9 +337,9 @@ def _handle_request_raw(
 
         try:
             transport = kwargs.get("transport")
-            with httpx.Client(timeout=timeout, transport=transport) as client:
-                if transport:
-                    kwargs.pop("transport")
+            client = httpx.Client(timeout=timeout, transport=transport)
+            if transport:
+                kwargs.pop("transport")
             req = client.build_request(method, _url, headers=_headers, **kwargs)
             return client.send(req, stream=True)
 

diff --git a/examples/requirements-examples.txt b/examples/requirements-examples.txt
@@ -4,4 +4,6 @@
 python-dotenv
 
 # streaming libs
-pyaudio
+pyaudio
+sounddevice==0.4.7
+numpy==2.0.1
diff --git a/examples/text-to-speech/rest/raw/hello_world_play/main.py b/examples/text-to-speech/rest/raw/hello_world_play/main.py
@@ -0,0 +1,103 @@
+# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+import sounddevice as sd
+import numpy as np
+import queue
+import threading
+
+from deepgram import (
+    DeepgramClient,
+    SpeakOptions,
+)
+
+SPEAK_TEXT = {"text": "Hello world!"}
+
+
+# Define a queue to manage audio data
+audio_queue = queue.Queue(maxsize=20)  # Adjust size as needed
+
+element_size = np.dtype(np.int16).itemsize  # Element size for np.int16 (16-bit integer)
+CHUNK_SIZE = 32768  # Desired size of each audio chunk in bytes
+
+
+def fetch_audio(response):
+    try:
+        buffer = bytearray()  # Buffer to accumulate data
+        for data in response.iter_bytes():
+            buffer.extend(data)  # Add incoming data to buffer
+            while len(buffer) >= CHUNK_SIZE:
+                # Extract a chunk of the desired size
+                chunk = buffer[:CHUNK_SIZE]
+                buffer = buffer[CHUNK_SIZE:]  # Remove the chunk from the buffer
+
+                # Ensure the chunk is aligned to the element size
+                buffer_size = len(chunk) - (len(chunk) % element_size)
+
+                if buffer_size > 0:
+                    audio_data = np.frombuffer(chunk[:buffer_size], dtype=np.int16)
+                    audio_queue.put(audio_data)
+                    print(
+                        f"Queued audio data of size: {audio_data.size * element_size} bytes"
+                    )
+
+        # Process any remaining data in the buffer
+        if buffer:
+            audio_data = np.frombuffer(buffer, dtype=np.int16)
+            audio_queue.put(audio_data)
+            print(
+                f"Queued remaining audio data of size: {audio_data.size * element_size} bytes"
+            )
+
+        # Signal the end of the stream
+        audio_queue.put(None)
+        print("End of audio stream.")
+    except Exception as e:
+        print(f"Fetch audio exception: {e}")
+
+
+def main():
+    try:
+        # STEP 1: Create a Deepgram client using the API key from environment variables
+        deepgram: DeepgramClient = DeepgramClient()
+
+        # STEP 2: Call the save method on the speak property
+        options = SpeakOptions(
+            model="aura-asteria-en",
+            encoding="linear16",
+            container="none",
+            sample_rate=48000,
+        )
+
+        response = deepgram.speak.rest.v("1").stream_raw(SPEAK_TEXT, options)
+
+        # Display response headers
+        print("Response headers:")
+        for header in response.headers:
+            print(f"{header}: {response.headers[header]}")
+
+        # Create and start a separate thread for fetching audio
+        fetch_thread = threading.Thread(target=fetch_audio, args=(response,))
+        fetch_thread.start()
+
+        # Play audio data from the queue
+        while True:
+            audio_data = audio_queue.get()
+            if audio_data is None:
+                break  # End of stream
+
+            # Play audio data using sounddevice
+            sd.play(audio_data, samplerate=48000)
+            sd.wait()  # Wait for the audio to finish playing
+
+        fetch_thread.join()
+
+        print("Audio playback finished.")
+
+    except Exception as e:
+        print(f"Exception: {e}")
+
+
+if __name__ == "__main__":
+    main()