From 86421501027bb657fa332ab7c2d5a22a63addc11 Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 09:46:06 -0400 Subject: [PATCH 1/8] Deprecate tiers and add fully qualified models --- src/transcription/prerecorded/options.rs | 303 ++++++++++++++--------- 1 file changed, 192 insertions(+), 111 deletions(-) diff --git a/src/transcription/prerecorded/options.rs b/src/transcription/prerecorded/options.rs index 6a795db8..ade09450 100644 --- a/src/transcription/prerecorded/options.rs +++ b/src/transcription/prerecorded/options.rs @@ -9,7 +9,6 @@ use serde::{ser::SerializeSeq, Serialize}; /// Used as a parameter for [`Transcription::prerecorded`](crate::transcription::Transcription::prerecorded) and similar functions. #[derive(Debug, PartialEq, Clone)] pub struct Options { - tier: Option, model: Option, version: Option, language: Option, @@ -30,21 +29,6 @@ pub struct Options { detect_language: Option, } -/// Used as a parameter for [`OptionsBuilder::tier`]. -/// -/// See the [Deepgram Tier feature docs][docs] for more info. -/// -/// [docs]: https://developers.deepgram.com/documentation/features/tier/ -#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] -#[non_exhaustive] -pub enum Tier { - #[allow(missing_docs)] - Enhanced, - - #[allow(missing_docs)] - Base, -} - /// Used as a parameter for [`OptionsBuilder::model`] and [`OptionsBuilder::multichannel_with_models`]. /// /// See the [Deepgram Model feature docs][docs] for more info. @@ -53,24 +37,141 @@ pub enum Tier { #[derive(Debug, PartialEq, Eq, Clone, Hash)] #[non_exhaustive] pub enum Model { + + /// Recommended for readability and Deepgram's lowest word error rates. + /// Recommended for most use cases. + /// + /// Nova-2 expands on Nova-1's advancements with speech-specific + /// optimizations to the underlying Transformer architecture, advanced + /// data curation techniques, and a multi-stage training methodology. + /// These changes yield reduced word error rate (WER) and enhancements + /// to entity recognition (i.e. proper nouns, alphanumerics, etc.), + /// punctuation, and capitalization. + Nova2, + + /// Recommended for readability and low word error rates. + /// + /// Nova is the predecessor to Nova-2. Training on this model spans over + /// 100 domains and 47 billion tokens, making it the deepest-trained + /// automatic speech-to-text model to date. Nova doesn't just excel in one + /// specific domain — it is ideal for a wide array of voice applications + /// that require high accuracy in diverse contexts. See the benchmarks. + Nova, + + /// Recommended for lower word error rates than Base, high accuracy + /// timestamps, and use cases that require keyword boosting. + Enhanced, + + /// Recommended for large transcription volumes and high accuracy + /// timestamps. + Base, + + #[allow(missing_docs)] + Nova2Meeting, + + #[allow(missing_docs)] + Nova2Phonecall, + + #[allow(missing_docs)] + Nova2Finance, + + #[allow(missing_docs)] + Nova2Conversationalai, + + #[allow(missing_docs)] + Nova2Voicemail, + + #[allow(missing_docs)] + Nova2Video, + + #[allow(missing_docs)] + Nova2Medical, + + #[allow(missing_docs)] + Nova2Drivethru, + + #[allow(missing_docs)] + Nova2Automotive, + + #[allow(missing_docs)] + NovaPhonecall, + + #[allow(missing_docs)] + NovaMedical, + + #[allow(missing_docs)] + EnhancedMeeting, + + #[allow(missing_docs)] + EnhancedPhonecall, + + #[allow(missing_docs)] + EnhancedFinance, + + #[allow(missing_docs)] + BaseMeeting, + + #[allow(missing_docs)] + BasePhonecall, + + #[allow(missing_docs)] + BaseVoicemail, + + #[allow(missing_docs)] + BaseFinance, + + #[allow(missing_docs)] + BaseConversationalai, + + #[allow(missing_docs)] + BaseVideo, + + #[deprecated( + since = "0.5.0", + note = "use one of the general-purpose models like Model::Nova2 instead" + )] #[allow(missing_docs)] General, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Meeting instead" + )] #[allow(missing_docs)] Meeting, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Phonecall instead" + )] #[allow(missing_docs)] Phonecall, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Voicemail instead" + )] #[allow(missing_docs)] Voicemail, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Finance instead" + )] #[allow(missing_docs)] Finance, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Conversationalai instead" + )] #[allow(missing_docs)] Conversationalai, + #[deprecated( + since = "0.5.0", + note = "use one of the qualified models like Model::Nova2Video instead" + )] #[allow(missing_docs)] Video, @@ -265,7 +366,6 @@ impl OptionsBuilder { /// Construct a new [`OptionsBuilder`]. pub fn new() -> Self { Self(Options { - tier: None, model: None, version: None, language: None, @@ -287,30 +387,6 @@ impl OptionsBuilder { }) } - /// Set the Tier feature. - /// - /// Not all tiers are supported for all models and languages. - /// For a list of models/languages and their supported models, - /// see the [Deepgram Language feature][language] docs. - /// - /// See the [Deepgram Tier feature docs][docs] for more info. - /// - /// [language]: https://developers.deepgram.com/documentation/features/language/ - /// [docs]: https://developers.deepgram.com/documentation/features/tier/ - /// - /// # Examples - /// - /// ``` - /// # use deepgram::transcription::prerecorded::options::{Options, Tier}; - /// # - /// let options = Options::builder() - /// .tier(Tier::Enhanced) - /// .build(); - /// ``` - pub fn tier(mut self, tier: Tier) -> Self { - self.0.tier = Some(tier); - self - } /// Set the Model feature. /// @@ -331,7 +407,7 @@ impl OptionsBuilder { /// # use deepgram::transcription::prerecorded::options::{Model, Options}; /// # /// let options = Options::builder() - /// .model(Model::General) + /// .model(Model::Nova2) /// .build(); /// ``` /// @@ -339,13 +415,13 @@ impl OptionsBuilder { /// # use deepgram::transcription::prerecorded::options::{Model, Options}; /// # /// let options1 = Options::builder() - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) - /// .model(Model::General) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) + /// .model(Model::Nova2) /// .build(); /// /// let options2 = Options::builder() /// .multichannel(true) - /// .model(Model::General) + /// .model(Model::Nova2) /// .build(); /// /// assert_eq!(options1, options2); @@ -547,13 +623,13 @@ impl OptionsBuilder { /// # use deepgram::transcription::prerecorded::options::{Model, Options}; /// # /// let options1 = Options::builder() - /// .model(Model::General) - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) + /// .model(Model::Nova2) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) /// .multichannel(true) /// .build(); /// /// let options2 = Options::builder() - /// .model(Model::General) + /// .model(Model::Nova2) /// .multichannel(true) /// .build(); /// @@ -612,12 +688,12 @@ impl OptionsBuilder { /// let dg_transcription = dg_client.transcription(); /// /// let options1 = Options::builder() - /// .model(Model::General) - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) + /// .model(Model::Nova2) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) /// .build(); /// /// let options2 = Options::builder() - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) /// .build(); /// /// let request1 = dg_transcription @@ -643,13 +719,13 @@ impl OptionsBuilder { /// # use deepgram::transcription::prerecorded::options::{Model, Options}; /// # /// let options1 = Options::builder() - /// .model(Model::General) - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) + /// .model(Model::Nova2) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) /// .multichannel(true) /// .build(); /// /// let options2 = Options::builder() - /// .model(Model::General) + /// .model(Model::Nova2) /// .multichannel(true) /// .build(); /// @@ -660,12 +736,12 @@ impl OptionsBuilder { /// # use deepgram::transcription::prerecorded::options::{Model, Options}; /// # /// let options1 = Options::builder() - /// .multichannel_with_models([Model::Meeting]) - /// .multichannel_with_models([Model::Phonecall]) + /// .multichannel_with_models([Model::Nova2Meeting]) + /// .multichannel_with_models([Model::Nova2Phonecall]) /// .build(); /// /// let options2 = Options::builder() - /// .multichannel_with_models([Model::Meeting, Model::Phonecall]) + /// .multichannel_with_models([Model::Nova2Meeting, Model::Nova2Phonecall]) /// .build(); /// /// assert_eq!(options1, options2); @@ -1095,7 +1171,6 @@ impl Serialize for SerializableOptions<'_> { // Destructuring it makes sure that we don't forget to use any of it let Options { - tier, model, version, language, @@ -1116,10 +1191,6 @@ impl Serialize for SerializableOptions<'_> { detect_language, } = self.0; - if let Some(tier) = tier { - seq.serialize_element(&("tier", tier.as_ref()))?; - } - match multichannel { // Multichannels with models is enabled // Ignore self.model field @@ -1235,30 +1306,49 @@ impl Serialize for SerializableOptions<'_> { } } -impl AsRef for Tier { - fn as_ref(&self) -> &str { - use Tier::*; - - match self { - Enhanced => "enhanced", - Base => "base", - } - } -} impl AsRef for Model { fn as_ref(&self) -> &str { - use Model::*; - match self { - General => "general", - Meeting => "meeting", - Phonecall => "phonecall", - Voicemail => "voicemail", - Finance => "finance", - Conversationalai => "conversationalai", - Video => "video", - CustomId(id) => id, + Self::Nova2 => "nova-2", + Self::Nova => "nova", + Self::Enhanced => "enhanced", + Self::Base => "base", + Self::Nova2Meeting => "nova-2-meeting", + Self::Nova2Phonecall => "nova-2-phonecall", + Self::Nova2Finance => "nova-2-finance", + Self::Nova2Conversationalai => "nova-2-conversationalai", + Self::Nova2Voicemail => "nova-2-voicemail", + Self::Nova2Video => "nova-2-video", + Self::Nova2Medical => "nova-2-medical", + Self::Nova2Drivethru => "nova-2-drivethru", + Self::Nova2Automotive => "nova-2-automotive", + Self::NovaPhonecall => "nova-phonecall", + Self::NovaMedical => "nova-medical", + Self::EnhancedMeeting => "enhanced-meeting", + Self::EnhancedPhonecall => "enhanced-phonecall", + Self::EnhancedFinance => "enhanced-finance", + Self::BaseMeeting => "base-meeting", + Self::BasePhonecall => "base-phonecall", + Self::BaseVoicemail => "base-voicemail", + Self::BaseFinance => "base-finance", + Self::BaseConversationalai => "base-conversationalai", + Self::BaseVideo => "base-video", + #[allow(deprecated)] + Self::General => "general", + #[allow(deprecated)] + Self::Phonecall => "phonecall", + #[allow(deprecated)] + Self::Voicemail => "voicemail", + #[allow(deprecated)] + Self::Finance => "finance", + #[allow(deprecated)] + Self::Meeting => "meeting", + #[allow(deprecated)] + Self::Conversationalai => "conversationalai", + #[allow(deprecated)] + Self::Video => "video", + Self::CustomId(id) => id, } } } @@ -1323,7 +1413,7 @@ fn models_to_string(models: &[Model]) -> String { #[cfg(test)] mod models_to_string_tests { - use super::{Model::*, *}; + use super::*; #[test] fn empty() { @@ -1332,14 +1422,18 @@ mod models_to_string_tests { #[test] fn one() { - assert_eq!(models_to_string(&[General]), "general"); + assert_eq!(models_to_string(&[Model::Base]), "base"); } #[test] fn many() { assert_eq!( - models_to_string(&[Phonecall, Meeting, Voicemail]), - "phonecall:meeting:voicemail" + models_to_string(&[ + Model::BasePhonecall, + Model::BaseMeeting, + Model::BaseVoicemail + ]), + "base-phonecall:base-meeting:base-voicemail" ); } @@ -1347,11 +1441,11 @@ mod models_to_string_tests { fn custom() { assert_eq!( models_to_string(&[ - Finance, - CustomId(String::from("extra_crispy")), - Conversationalai + Model::EnhancedFinance, + Model::CustomId(String::from("extra_crispy")), + Model::Nova2Conversationalai, ]), - "finance:extra_crispy:conversationalai" + "enhanced-finance:extra_crispy:nova-2-conversationalai" ); } } @@ -1400,8 +1494,7 @@ mod serialize_options_tests { #[test] fn all_options() { let options = Options::builder() - .tier(Tier::Enhanced) - .model(Model::General) + .model(Model::Base) .version("1.2.3") .language(Language::en_US) .punctuate(true) @@ -1410,9 +1503,9 @@ mod serialize_options_tests { .diarize(true) .ner(true) .multichannel_with_models([ - Model::Finance, + Model::EnhancedFinance, Model::CustomId(String::from("extra_crispy")), - Model::Conversationalai, + Model::Nova2Conversationalai, ]) .alternatives(4) .numerals(true) @@ -1430,25 +1523,13 @@ mod serialize_options_tests { .tag(["Tag 1"]) .build(); - check_serialization(&options, "tier=enhanced&model=finance%3Aextra_crispy%3Aconversationalai&version=1.2.3&language=en-US&punctuate=true&profanity_filter=true&redact=pci&redact=ssn&diarize=true&ner=true&multichannel=true&alternatives=4&numerals=true&search=Rust&search=Deepgram&replace=Aaron%3AErin&keywords=Ferris&keywords=Cargo%3A-1.5&utterances=true&utt_split=0.9&tag=Tag+1"); + check_serialization(&options, "model=enhanced-finance%3Aextra_crispy%3Anova-2-conversationalai&version=1.2.3&language=en-US&punctuate=true&profanity_filter=true&redact=pci&redact=ssn&diarize=true&ner=true&multichannel=true&alternatives=4&numerals=true&search=Rust&search=Deepgram&replace=Aaron%3AErin&keywords=Ferris&keywords=Cargo%3A-1.5&utterances=true&utt_split=0.9&tag=Tag+1"); } - #[test] - fn tier() { - check_serialization( - &Options::builder().tier(Tier::Enhanced).build(), - "tier=enhanced", - ); - - check_serialization(&Options::builder().tier(Tier::Base).build(), "tier=base"); - } #[test] fn model() { - check_serialization( - &Options::builder().model(Model::General).build(), - "model=general", - ); + check_serialization(&Options::builder().model(Model::Base).build(), "model=base"); check_serialization( &Options::builder() @@ -1565,12 +1646,12 @@ mod serialize_options_tests { check_serialization( &Options::builder() .multichannel_with_models([ - Model::Finance, + Model::EnhancedFinance, Model::CustomId(String::from("extra_crispy")), - Model::Conversationalai, + Model::Nova2Conversationalai, ]) .build(), - "model=finance%3Aextra_crispy%3Aconversationalai&multichannel=true", + "model=enhanced-finance%3Aextra_crispy%3Anova-2-conversationalai&multichannel=true", ); } From cdb6887e5d337bc0b8b59d948adcee06bf63cc0f Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 09:52:23 -0400 Subject: [PATCH 2/8] Add all currently supported languages to the Language enum --- src/transcription/prerecorded/options.rs | 185 ++++++++++++++++++----- 1 file changed, 146 insertions(+), 39 deletions(-) diff --git a/src/transcription/prerecorded/options.rs b/src/transcription/prerecorded/options.rs index ade09450..9472582d 100644 --- a/src/transcription/prerecorded/options.rs +++ b/src/transcription/prerecorded/options.rs @@ -189,16 +189,25 @@ pub enum Model { #[non_exhaustive] pub enum Language { #[allow(missing_docs)] - zh, + bg, #[allow(missing_docs)] - zh_CN, + ca, #[allow(missing_docs)] - zh_TW, + cs, #[allow(missing_docs)] - nl, + da, + + #[allow(missing_docs)] + de, + + #[allow(missing_docs)] + de_CH, + + #[allow(missing_docs)] + el, #[allow(missing_docs)] en, @@ -219,13 +228,25 @@ pub enum Language { en_US, #[allow(missing_docs)] - fr, + es, #[allow(missing_docs)] - fr_CA, + es_419, #[allow(missing_docs)] - de, + es_LATAM, + + #[allow(missing_docs)] + et, + + #[allow(missing_docs)] + fi, + + #[allow(missing_docs)] + fr, + + #[allow(missing_docs)] + fr_CA, #[allow(missing_docs)] hi, @@ -233,6 +254,9 @@ pub enum Language { #[allow(missing_docs)] hi_Latn, + #[allow(missing_docs)] + hu, + #[allow(missing_docs)] id, @@ -245,6 +269,30 @@ pub enum Language { #[allow(missing_docs)] ko, + #[allow(missing_docs)] + ko_KR, + + #[allow(missing_docs)] + lv, + + #[allow(missing_docs)] + lt, + + #[allow(missing_docs)] + ms, + + #[allow(missing_docs)] + nl, + + #[allow(missing_docs)] + nl_BE, + + #[allow(missing_docs)] + no, + + #[allow(missing_docs)] + pl, + #[allow(missing_docs)] pt, @@ -252,23 +300,56 @@ pub enum Language { pt_BR, #[allow(missing_docs)] - ru, + ro, #[allow(missing_docs)] - es, + ru, #[allow(missing_docs)] - es_419, + sk, #[allow(missing_docs)] sv, + #[allow(missing_docs)] + sv_SE, + + #[allow(missing_docs)] + ta, + + #[allow(missing_docs)] + taq, + + #[allow(missing_docs)] + th, + + #[allow(missing_docs)] + th_TH, + #[allow(missing_docs)] tr, #[allow(missing_docs)] uk, + #[allow(missing_docs)] + vi, + + #[allow(missing_docs)] + zh, + + #[allow(missing_docs)] + zh_CN, + + #[allow(missing_docs)] + zh_Hans, + + #[allow(missing_docs)] + zh_Hant, + + #[allow(missing_docs)] + zh_TW, + /// Avoid using the `Other` variant where possible. /// It exists so that you can use new languages that Deepgram supports without being forced to update your version of the SDK. /// See the [Deepgram Language feature docs][docs] for the most up-to-date list of supported languages. @@ -1355,37 +1436,63 @@ impl AsRef for Model { impl AsRef for Language { fn as_ref(&self) -> &str { - use Language::*; match self { - zh => "zh", - zh_CN => "zh-CN", - zh_TW => "zh-TW", - nl => "nl", - en => "en", - en_AU => "en-AU", - en_GB => "en-GB", - en_IN => "en-IN", - en_NZ => "en-NZ", - en_US => "en-US", - fr => "fr", - fr_CA => "fr-CA", - de => "de", - hi => "hi", - hi_Latn => "hi-Latn", - id => "id", - it => "it", - ja => "ja", - ko => "ko", - pt => "pt", - pt_BR => "pt-BR", - ru => "ru", - es => "es", - es_419 => "es-419", - sv => "sv", - tr => "tr", - uk => "uk", - Other(bcp_47_tag) => bcp_47_tag, + Self::bg => "bg", + Self::ca => "ca", + Self::cs => "cs", + Self::da => "da", + Self::de => "de", + Self::de_CH => "de-CH", + Self::el => "el", + Self::en => "en", + Self::en_AU => "en-AU", + Self::en_GB => "en-GB", + Self::en_IN => "en-IN", + Self::en_NZ => "en-NZ", + Self::en_US => "en-US", + Self::es => "es", + Self::es_419 => "es-419", + Self::es_LATAM => "es-LATAM", + Self::et => "et", + Self::fi => "fi", + Self::fr => "fr", + Self::fr_CA => "fr-CA", + Self::hi => "hi", + Self::hi_Latn => "hi-Latn", + Self::hu => "hu", + Self::id => "id", + Self::it => "it", + Self::ja => "ja", + Self::ko => "ko", + Self::ko_KR => "ko-KR", + Self::lv => "lv", + Self::lt => "lt", + Self::ms => "ms", + Self::nl => "nl", + Self::nl_BE => "nl-BE", + Self::no => "no", + Self::pl => "pl", + Self::pt => "pt", + Self::pt_BR => "pt-BR", + Self::ro => "ro", + Self::ru => "ru", + Self::sk => "sk", + Self::sv => "sv", + Self::sv_SE => "sv-SE", + Self::ta => "ta", + Self::taq => "taq", + Self::th => "th", + Self::th_TH => "th-TH", + Self::tr => "tr", + Self::uk => "uk", + Self::vi => "vi", + Self::zh => "zh", + Self::zh_CN => "zh-CN", + Self::zh_Hans => "zh-Hans", + Self::zh_Hant => "zh-Hant", + Self::zh_TW => "zh-TW", + Self::Other(bcp_47_tag) => bcp_47_tag, } } } From dfa49e85ddbcf35b0bc2e7670eccf6c6bac3e5e9 Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 11:00:47 -0400 Subject: [PATCH 3/8] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b686bc1..2ee7c0ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Deprecate tiers and add explicit support for all currently available models. ## [0.4.0] - 2023-11-01 From 4542c38901e4358b87b23559f120556e7c13a9fc Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 11:03:13 -0400 Subject: [PATCH 4/8] Add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ee7c0ac..2387daf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - Deprecate tiers and add explicit support for all currently available models. +- Expand language enum to include all currently-supported languages. ## [0.4.0] - 2023-11-01 From aa56381f465cddfde1bf3ca215f1002d7aa7626c Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 11:20:33 -0400 Subject: [PATCH 5/8] fmt --- src/transcription/prerecorded/options.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/transcription/prerecorded/options.rs b/src/transcription/prerecorded/options.rs index ade09450..d9a28dea 100644 --- a/src/transcription/prerecorded/options.rs +++ b/src/transcription/prerecorded/options.rs @@ -37,7 +37,6 @@ pub struct Options { #[derive(Debug, PartialEq, Eq, Clone, Hash)] #[non_exhaustive] pub enum Model { - /// Recommended for readability and Deepgram's lowest word error rates. /// Recommended for most use cases. /// @@ -387,7 +386,6 @@ impl OptionsBuilder { }) } - /// Set the Model feature. /// /// Not all models are supported for all languages. For a list of languages and their supported models, see @@ -1306,7 +1304,6 @@ impl Serialize for SerializableOptions<'_> { } } - impl AsRef for Model { fn as_ref(&self) -> &str { match self { @@ -1526,7 +1523,6 @@ mod serialize_options_tests { check_serialization(&options, "model=enhanced-finance%3Aextra_crispy%3Anova-2-conversationalai&version=1.2.3&language=en-US&punctuate=true&profanity_filter=true&redact=pci&redact=ssn&diarize=true&ner=true&multichannel=true&alternatives=4&numerals=true&search=Rust&search=Deepgram&replace=Aaron%3AErin&keywords=Ferris&keywords=Cargo%3A-1.5&utterances=true&utt_split=0.9&tag=Tag+1"); } - #[test] fn model() { check_serialization(&Options::builder().model(Model::Base).build(), "model=base"); From 564d8b43ed68308c3f043a0b7735e6b9501a49c1 Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 11:33:21 -0400 Subject: [PATCH 6/8] Fix minimal tokio version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3db6f109..e6758a01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["json", " serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" -tokio = { version = "1", features = ["full"] } +tokio = { version = "1.13.0", features = ["full"] } tokio-tungstenite = { version = "0.20.1", features = ["rustls-tls-webpki-roots"] } tokio-util = { version = "0.7.1", features = ["codec", "io"] } tungstenite = "0.20.1" From 54f7b7657e95242bf097a8da9f3f6d6510e4fd40 Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 11:56:48 -0400 Subject: [PATCH 7/8] fmt --- src/transcription/prerecorded/options.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transcription/prerecorded/options.rs b/src/transcription/prerecorded/options.rs index 43240763..cedf0de1 100644 --- a/src/transcription/prerecorded/options.rs +++ b/src/transcription/prerecorded/options.rs @@ -1433,7 +1433,6 @@ impl AsRef for Model { impl AsRef for Language { fn as_ref(&self) -> &str { - match self { Self::bg => "bg", Self::ca => "ca", From 8f9b6f9792a8f04edbec3b68adf951f8cfe5a695 Mon Sep 17 00:00:00 2001 From: Cliff Dyer Date: Wed, 3 Jul 2024 16:12:42 -0400 Subject: [PATCH 8/8] Featurize transcription modes (#68) Add feature flags for prerecorded and live transcription --- .github/workflows/ci.yaml | 14 ++++++++++++++ CHANGELOG.md | 1 + Cargo.toml | 33 +++++++++++++++++++++++++++++++-- src/lib.rs | 2 ++ src/transcription.rs | 4 +++- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 42dab37c..13bb40bb 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,6 +7,20 @@ env: RUSTDOCFLAGS: -D warnings jobs: + Features: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install libasound2-dev + run: | + sudo apt-get update + sudo apt-get install libasound2-dev + - name: Check no features + run: cargo check --all-targets --no-default-features + - name: Check prerecorded feature + run: cargo check --all-targets --no-default-features --features=prerecorded + - name: Check live feature + run: cargo check --all-targets --no-default-features --features=live Build: runs-on: ubuntu-latest steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 2387daf3..9db4fb3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - Deprecate tiers and add explicit support for all currently available models. - Expand language enum to include all currently-supported languages. +- Add (default on) feature flags for live and prerecorded transcription. ## [0.4.0] - 2023-11-01 diff --git a/Cargo.toml b/Cargo.toml index e6758a01..6b702b21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,9 +22,9 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" tokio = { version = "1.13.0", features = ["full"] } -tokio-tungstenite = { version = "0.20.1", features = ["rustls-tls-webpki-roots"] } +tokio-tungstenite = { version = "0.20.1", features = ["rustls-tls-webpki-roots"], optional = true } tokio-util = { version = "0.7.1", features = ["codec", "io"] } -tungstenite = "0.20.1" +tungstenite = { version = "0.20.1", optional = true } url = "2" uuid = { version = "1", features = ["serde"] } # Dependencies below are specified only to satisfy minimal-versions. @@ -34,3 +34,32 @@ proc-macro2 = "1.0.60" pkg-config = "0.3.27" cpal = "0.13" crossbeam = "0.8" + +[features] +default = ["prerecorded", "live"] +live = ["dep:tungstenite", "dep:tokio-tungstenite"] +prerecorded = [] + +[[example]] +name = "prerecorded_from_file" +required-features = ["prerecorded"] + +[[example]] +name = "callback" +required-features = ["prerecorded"] + +[[example]] +name = "make_prerecorded_request_builder" +required-features = ["prerecorded"] + +[[example]] +name = "microphone_stream" +required-features = ["live"] + +[[example]] +name = "prerecorded_from_url" +required-features = ["prerecorded"] + +[[example]] +name = "simple_stream" +required-features = ["live"] diff --git a/src/lib.rs b/src/lib.rs index f0839b9e..f6635679 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ mod response; /// Make transcriptions requests using [`Deepgram::transcription`]. #[derive(Debug, Clone)] pub struct Deepgram { + #[cfg_attr(not(feature = "live"), allow(unused))] api_key: String, client: reqwest::Client, } @@ -65,6 +66,7 @@ pub enum DeepgramError { #[error("Something went wrong during I/O: {0}")] IoError(#[from] io::Error), + #[cfg(feature = "live")] /// Something went wrong with WS. #[error("Something went wrong with WS: {0}")] WsError(#[from] tungstenite::Error), diff --git a/src/transcription.rs b/src/transcription.rs index 6a576e79..847ec47d 100644 --- a/src/transcription.rs +++ b/src/transcription.rs @@ -6,7 +6,9 @@ use crate::Deepgram; +#[cfg(feature = "live")] pub mod live; +#[cfg(feature = "prerecorded")] pub mod prerecorded; /// Transcribe audio using Deepgram's automated speech recognition. @@ -17,7 +19,7 @@ pub mod prerecorded; /// /// [api]: https://developers.deepgram.com/api-reference/#transcription #[derive(Debug, Clone)] -pub struct Transcription<'a>(&'a Deepgram); +pub struct Transcription<'a>(#[allow(unused)] &'a Deepgram); impl Deepgram { /// Construct a new [`Transcription`] from a [`Deepgram`].