Create a low-level interface for websocket transcription (#82)

* Add low-level websocket handle to transcription api * fmt * Use more minimum-version friendly Bytes::to_vec * Clean up changelog * Make `Deepgram` constructors fallible. * Ensure BASE_URL can be a base url. * Fix message flow for websocket futures::Stream interface * Improve documentation, audit unwraps, and add websocket close error type. * expose internal error type * update README * fix doctests * Capitalize token * Handle None from stream * Use tokio ReceiverStream to properly shut down File interface. * warning to set API key in example --------- Co-authored-by: Brent George <[email protected]>
deepgram · Aug 12, 2024 · 4821b91 · 4821b91
1 parent d45441c
commit 4821b91
Show file tree

Hide file tree

Showing 23 changed files with 728 additions and 264 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,13 +4,15 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.6.0] - 2024-07-23
+## [Unreleased](https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.6.0...HEAD)
+
+## [0.6.0](https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.5.0...0.6.0) - 2024-08-08
 
 ### Migrating from 0.4.0 -> 0.6.0
 
-Module Imports
+#### Module Imports
 
-```
+```rust
 use deepgram::{
 ---    transcription::prerecorded::{
 +++    common::{
@@ -21,10 +23,49 @@ use deepgram::{
 };
 ```
 
-Streaming Changes
+#### Streaming Changes
 
-Now you can pass Options using stream_request_with_options
+We have exposed a low-level, message-based interface to the websocket API:
+
+```rust
+use futures::select;
+
+let mut handle = dg
+    .transcription()
+    .stream_request()
+    .handle()
+    .await?;
+
+loop {
+    select! {
+        _ = tokio::time::sleep(Duration::from_secs(3)) => handle.keep_alive().await,
+        _ = handle.send_data(data_chunk()).fuse() => {}
+        response = handle.receive().fuse() => {
+            match response {
+                Some(response) => println!("{response:?}"),
+                None => break,
+            }
+        }
+    }
+}
+handle.close_stream().await;
 ```
+
+No need to call `.start()` to begin streaming data.
+
+```rust
+let mut results = dg
+    .transcription()
+    .stream_request_with_options(Some(&options))
+    .file(PATH_TO_FILE, AUDIO_CHUNK_SIZE, Duration::from_millis(16))
+---    .await
+---    .start()
+    .await;
+```
+
+Now you can pass Options using stream_request_with_options
+
+```rust
 let options = Options::builder()
     .smart_format(true)
     .language(Language::en_US)
@@ -35,8 +76,6 @@ let mut results = dg
     .stream_request_with_options(Some(&options))
     .file(PATH_TO_FILE, AUDIO_CHUNK_SIZE, Duration::from_millis(16))
     .await?
-    .start()
-    .await?;
 ```
 
 Some Enums have changed and may need to be updated
@@ -48,6 +87,7 @@ Some Enums have changed and may need to be updated
 - Add Speech to Text
 - Reorganize Code
 
+
 ### Streaming Features
 - endpointing
 - utterance_end_ms
@@ -86,13 +126,14 @@ Some Enums have changed and may need to be updated
 - custom_topics
 - custom_topic_mode
 
-## [0.5.0]
+## [0.5.0](https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.4.0...0.5.0) - 2024-07-08
+
 - Deprecate tiers and add explicit support for all currently available models.
 - Expand language enum to include all currently-supported languages.
 - Add (default on) feature flags for live and prerecorded transcription.
 - Support arbitrary query params in transcription options.
 
-## [0.4.0] - 2023-11-01
+## [0.4.0](https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.3.0...0.4.0) - 2023-11-01
 
 ### Added
 - `detect_language` option.
@@ -101,7 +142,7 @@ Some Enums have changed and may need to be updated
 - Remove generic from `Deepgram` struct.
 - Upgrade dependencies: `tungstenite`, `tokio-tungstenite`, `reqwest`.
 
-## [0.3.0]
+## [0.3.0](https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.2.1...0.3.0) - 2023-07-26
 
 ### Added
 - Derive `Serialize` for all response types.
@@ -113,6 +154,3 @@ Some Enums have changed and may need to be updated
 ### Changed
 - Use Rustls instead of OpenSSL.
 
-[Unreleased]: https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.4.0...HEAD
-[0.4.0]: https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.3.0...0.4.0
-[0.3.0]: https://github.com/deepgram-devs/deepgram-rust-sdk/compare/0.2.1...0.3.0
diff --git a/Cargo.toml b/Cargo.toml
@@ -34,6 +34,8 @@ uuid = { version = "1", features = ["serde"] }
 # Dependencies below are specified only to satisfy minimal-versions.
 proc-macro2 = "1.0.60"
 pkg-config = { version = "0.3.30", optional = true }
+sha256 = "1.5.0"
+anyhow = "1.0.86"
 
 [dev-dependencies]
 cpal = "0.13"

diff --git a/examples/README.md b/examples/README.md
@@ -3,7 +3,7 @@
 ### Setting Env Vars
 
 ```sh
-export FILENAME=./examples/audio/Bueller-Life-moves-pretty-fast.mp3
+export FILENAME=./examples/audio/bueller.wav
 ```
 
 ### Running the examples
@@ -17,5 +17,21 @@ cargo run --example simple_stream
 ```
 
 ```sh
-cargo run --example advanced_stream
-```
+cargo run --example callback
+```
+
+```sh
+cargo run --example make_prerecorded_request_builder
+```
+
+```sh
+cargo run --example microphone_stream
+```
+
+```sh
+cargo run --example text_to_speech_to_file
+```
+
+```sh
+cargo run --example text_to_speech_to_stream
+```
diff --git a/examples/speak/rest/text_to_speech_to_file.rs b/examples/speak/rest/text_to_speech_to_file.rs
@@ -10,7 +10,7 @@ async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let options = Options::builder()
         .model(Model::AuraAsteriaEn)

diff --git a/examples/speak/rest/text_to_speech_to_stream.rs b/examples/speak/rest/text_to_speech_to_stream.rs
@@ -90,7 +90,7 @@ async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let sample_rate = 16000;
     let channels = 1;

diff --git a/examples/transcription/rest/callback.rs b/examples/transcription/rest/callback.rs
@@ -15,7 +15,7 @@ async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let source = AudioSource::from_url(AUDIO_URL);
 

diff --git a/examples/transcription/rest/make_prerecorded_request_builder.rs b/examples/transcription/rest/make_prerecorded_request_builder.rs
@@ -6,17 +6,17 @@ use deepgram::{
         batch_response::Response,
         options::{Language, Options},
     },
-    Deepgram,
+    Deepgram, DeepgramError,
 };
 
 static AUDIO_URL: &str = "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav";
 
 #[tokio::main]
-async fn main() -> reqwest::Result<()> {
+async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let source = AudioSource::from_url(AUDIO_URL);
 

diff --git a/examples/transcription/rest/prerecorded_from_file.rs b/examples/transcription/rest/prerecorded_from_file.rs
@@ -16,7 +16,7 @@ async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let file = File::open(PATH_TO_FILE).await.unwrap();
 

diff --git a/examples/transcription/rest/prerecorded_from_url.rs b/examples/transcription/rest/prerecorded_from_url.rs
@@ -15,7 +15,7 @@ async fn main() -> Result<(), DeepgramError> {
     let deepgram_api_key =
         env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let dg_client = Deepgram::new(&deepgram_api_key);
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let source = AudioSource::from_url(AUDIO_URL);
 

diff --git a/examples/transcription/websocket/microphone_stream.rs b/examples/transcription/websocket/microphone_stream.rs
@@ -90,9 +90,12 @@ fn microphone_as_stream() -> FuturesReceiver<Result<Bytes, RecvError>> {
 
 #[tokio::main]
 async fn main() -> Result<(), DeepgramError> {
-    let dg = Deepgram::new(env::var("DEEPGRAM_API_KEY").unwrap());
+    let deepgram_api_key =
+        env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
 
-    let mut results = dg
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
+
+    let mut results = dg_client
         .transcription()
         .stream_request()
         .keep_alive()
@@ -102,7 +105,6 @@ async fn main() -> Result<(), DeepgramError> {
         // TODO Specific to my machine, not general enough example.
         .channels(2)
         .stream(microphone_as_stream())
-        .start()
         .await?;
 
     while let Some(result) = results.next().await {

diff --git a/examples/transcription/websocket/simple_stream.rs b/examples/transcription/websocket/simple_stream.rs
@@ -10,17 +10,21 @@ use deepgram::{
 
 static PATH_TO_FILE: &str = "examples/audio/bueller.wav";
 static AUDIO_CHUNK_SIZE: usize = 3174;
+static FRAME_DELAY: Duration = Duration::from_millis(16);
 
 #[tokio::main]
 async fn main() -> Result<(), DeepgramError> {
-    let dg = Deepgram::new(env::var("DEEPGRAM_API_KEY").unwrap());
+    let deepgram_api_key =
+        env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environmental variable");
+
+    let dg_client = Deepgram::new(&deepgram_api_key)?;
 
     let options = Options::builder()
         .smart_format(true)
         .language(Language::en_US)
         .build();
 
-    let mut results = dg
+    let mut results = dg_client
         .transcription()
         .stream_request_with_options(options)
         .keep_alive()
@@ -32,9 +36,7 @@ async fn main() -> Result<(), DeepgramError> {
         .utterance_end_ms(1000)
         .vad_events(true)
         .no_delay(true)
-        .file(PATH_TO_FILE, AUDIO_CHUNK_SIZE, Duration::from_millis(16))
-        .await?
-        .start()
+        .file(PATH_TO_FILE, AUDIO_CHUNK_SIZE, FRAME_DELAY)
         .await?;
 
     while let Some(result) = results.next().await {

diff --git a/src/common/options.rs b/src/common/options.rs
@@ -50,6 +50,11 @@ pub struct Options {
     callback_method: Option<CallbackMethod>,
 }
 
+impl Default for Options {
+    fn default() -> Self {
+        Options::builder().build()
+    }
+}
 /// Detect Language value
 ///
 /// See the [Deepgram Detect Language feature docs][docs] for more info.
@@ -1018,10 +1023,10 @@ impl OptionsBuilder {
     /// #
     /// # static AUDIO_URL: &str = "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav";
     /// #
-    /// # fn main() -> Result<(), reqwest::Error> {
+    /// # fn main() -> Result<(), deepgram::DeepgramError> {
     /// # let deepgram_api_key = env::var("DEEPGRAM_API_KEY").unwrap_or_default();
     /// #
-    /// let dg_client = Deepgram::new(&deepgram_api_key);
+    /// let dg_client = Deepgram::new(&deepgram_api_key)?;
     /// let dg_transcription = dg_client.transcription();
     ///
     /// let options1 = Options::builder()
@@ -2365,7 +2370,7 @@ mod serialize_options_tests {
     fn check_serialization(options: &Options, expected: &str) {
         let deepgram_api_key = env::var("DEEPGRAM_API_KEY").unwrap_or_default();
 
-        let dg_client = Deepgram::new(deepgram_api_key);
+        let dg_client = Deepgram::new(deepgram_api_key).unwrap();
 
         let request = dg_client
             .transcription()