Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chrome extension update #123

Merged
merged 10 commits into from
Feb 3, 2024
2 changes: 1 addition & 1 deletion Audio-Transcription-Chrome/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function init_element() {

elem_container = document.createElement('div');
elem_container.id = "transcription";
elem_container.style.cssText = 'padding-top:16px;font-size:18px;line-height:18px;top:0px;position:absolute;width:500px;height:90px;opacity:0.9;z-index:100;background:black;border-radius:10px;color:white;';
elem_container.style.cssText = 'padding-top:16px;font-size:18px;position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%);z-index: 9999;line-height:18px;width:500px;height:90px;opacity:0.9;z-index:100;background:black;border-radius:10px;color:white;';

for (var i = 0; i < 4; i++) {
elem_text = document.createElement('span');
Expand Down
4 changes: 0 additions & 4 deletions Audio-Transcription-Chrome/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,10 @@ async function startRecord(option) {
const socket = new WebSocket(`ws://${option.host}:${option.port}/`);
let isServerReady = false;
let language = option.language;
if (language === null && !option.multilingual) {
language = 'en';
}
socket.onopen = function(e) {
socket.send(
JSON.stringify({
uid: uuid,
multilingual: option.multilingual,
language: option.language,
task: option.task,
model: option.modelSize
Expand Down
185 changes: 93 additions & 92 deletions Audio-Transcription-Chrome/popup.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,112 +15,109 @@
<input type="checkbox" id="useServerCheckbox">
<label for="useServerCheckbox">Use Collabora Whisper-Live Server</label>
</div>
<div class="checkbox-container">
<input type="checkbox" id="useMultilingualCheckbox">
<label for="useMultilingualCheckbox">Use Multilingual Model</label>
</div>
<div class="dropdown-container">
<label for="languageDropdown">Select Language:</label>
<select id="languageDropdown" disabled>
<option value="">Select Language</option>
<option value="zh">Chinese</option>
<option value="de">German</option>
<option value="es">Spanish</option>
<option value="ru">Russian</option>
<option value="ko">Korean</option>
<option value="fr">French</option>
<option value="ja">Japanese</option>
<option value="pt">Portuguese</option>
<option value="tr">Turkish</option>
<option value="pl">Polish</option>
<select id="languageDropdown">
<option value="" selected>Automatically detect</option>
<option value="af">Afrikaans</option>
<option value="sq">Albanian</option>
<option value="am">Amharic</option>
<option value="ar">Arabic</option>
<option value="hy">Armenian</option>
<option value="as">Assamese</option>
<option value="az">Azerbaijani</option>
<option value="ba">Bashkir</option>
<option value="eu">Basque</option>
<option value="be">Belarusian</option>
<option value="bn">Bengali</option>
<option value="bs">Bosnian</option>
<option value="br">Breton</option>
<option value="bg">Bulgarian</option>
<option value="ca">Catalan</option>
<option value="zh">Chinese</option>
<option value="hr">Croatian</option>
<option value="cs">Czech</option>
<option value="da">Danish</option>
<option value="nl">Dutch</option>
<option value="ar">Arabic</option>
<option value="sv">Swedish</option>
<option value="it">Italian</option>
<option value="id">Indonesian</option>
<option value="hi">Hindi</option>
<option value="en">English</option>
<option value="et">Estonian</option>
<option value="fo">Faroese</option>
<option value="fi">Finnish</option>
<option value="vi">Vietnamese</option>
<option value="he">Hebrew</option>
<option value="uk">Ukrainian</option>
<option value="fr">French</option>
<option value="gl">Galician</option>
<option value="ka">Georgian</option>
<option value="de">German</option>
<option value="el">Greek</option>
<option value="ms">Malay</option>
<option value="cs">Czech</option>
<option value="ro">Romanian</option>
<option value="da">Danish</option>
<option value="gu">Gujarati</option>
<option value="ht">Haitian Creole</option>
<option value="ha">Hausa</option>
<option value="haw">Hawaiian</option>
<option value="he">Hebrew</option>
<option value="hi">Hindi</option>
<option value="hu">Hungarian</option>
<option value="ta">Tamil</option>
<option value="no">Norwegian</option>
<option value="th">Thai</option>
<option value="ur">Urdu</option>
<option value="hr">Croatian</option>
<option value="bg">Bulgarian</option>
<option value="lt">Lithuanian</option>
<option value="is">Icelandic</option>
<option value="id">Indonesian</option>
<option value="it">Italian</option>
<option value="ja">Japanese</option>
<option value="jw">Javanese</option>
<option value="kn">Kannada</option>
<option value="kk">Kazakh</option>
<option value="km">Khmer</option>
<option value="ko">Korean</option>
<option value="lo">Lao</option>
<option value="la">Latin</option>
<option value="mi">Maori</option>
<option value="ml">Malayalam</option>
<option value="cy">Welsh</option>
<option value="sk">Slovak</option>
<option value="te">Telugu</option>
<option value="fa">Persian</option>
<option value="lv">Latvian</option>
<option value="bn">Bengali</option>
<option value="sr">Serbian</option>
<option value="az">Azerbaijani</option>
<option value="sl">Slovenian</option>
<option value="kn">Kannada</option>
<option value="et">Estonian</option>
<option value="ln">Lingala</option>
<option value="lt">Lithuanian</option>
<option value="lb">Luxembourgish</option>
<option value="mk">Macedonian</option>
<option value="br">Breton</option>
<option value="eu">Basque</option>
<option value="is">Icelandic</option>
<option value="hy">Armenian</option>
<option value="ne">Nepali</option>
<option value="mn">Mongolian</option>
<option value="bs">Bosnian</option>
<option value="kk">Kazakh</option>
<option value="sq">Albanian</option>
<option value="sw">Swahili</option>
<option value="gl">Galician</option>
<option value="mg">Malagasy</option>
<option value="ms">Malay</option>
<option value="ml">Malayalam</option>
<option value="mt">Maltese</option>
<option value="mi">Maori</option>
<option value="mr">Marathi</option>
<option value="pa">Punjabi</option>
<option value="si">Sinhala</option>
<option value="km">Khmer</option>
<option value="sn">Shona</option>
<option value="yo">Yoruba</option>
<option value="so">Somali</option>
<option value="af">Afrikaans</option>
<option value="mn">Mongolian</option>
<option value="my">Myanmar</option>
<option value="ne">Nepali</option>
<option value="no">Norwegian</option>
<option value="nn">Nynorsk</option>
<option value="oc">Occitan</option>
<option value="ka">Georgian</option>
<option value="be">Belarusian</option>
<option value="tg">Tajik</option>
<option value="sd">Sindhi</option>
<option value="gu">Gujarati</option>
<option value="am">Amharic</option>
<option value="yi">Yiddish</option>
<option value="lo">Lao</option>
<option value="uz">Uzbek</option>
<option value="fo">Faroese</option>
<option value="ht">Haitian Creole</option>
<option value="ps">Pashto</option>
<option value="tk">Turkmen</option>
<option value="nn">Nynorsk</option>
<option value="mt">Maltese</option>
<option value="fa">Persian</option>
<option value="pl">Polish</option>
<option value="pt">Portuguese</option>
<option value="pa">Punjabi</option>
<option value="ro">Romanian</option>
<option value="ru">Russian</option>
<option value="sa">Sanskrit</option>
<option value="lb">Luxembourgish</option>
<option value="my">Myanmar</option>
<option value="bo">Tibetan</option>
<option value="sr">Serbian</option>
<option value="sn">Shona</option>
<option value="sd">Sindhi</option>
<option value="si">Sinhala</option>
<option value="sk">Slovak</option>
<option value="sl">Slovenian</option>
<option value="so">Somali</option>
<option value="es">Spanish</option>
<option value="su">Sundanese</option>
<option value="sw">Swahili</option>
<option value="sv">Swedish</option>
<option value="tl">Tagalog</option>
<option value="mg">Malagasy</option>
<option value="as">Assamese</option>
<option value="tg">Tajik</option>
<option value="ta">Tamil</option>
<option value="tt">Tatar</option>
<option value="haw">Hawaiian</option>
<option value="ln">Lingala</option>
<option value="ha">Hausa</option>
<option value="ba">Bashkir</option>
<option value="jw">Javanese</option>
<option value="su">Sundanese</option>
<option value="te">Telugu</option>
<option value="th">Thai</option>
<option value="bo">Tibetan</option>
<option value="tr">Turkish</option>
<option value="tk">Turkmen</option>
<option value="uk">Ukrainian</option>
<option value="ur">Urdu</option>
<option value="uz">Uzbek</option>
<option value="vi">Vietnamese</option>
<option value="cy">Welsh</option>
<option value="yi">Yiddish</option>
<option value="yo">Yoruba</option>
</select>
</div>
<div class="dropdown-container">
Expand All @@ -134,11 +131,15 @@
<div class="dropdown-container">
<label for="modelSizeDropdown">Select Model Size:</label>
<select id="modelSizeDropdown">
<option value="">Select Task</option>
<option value="tiny">Tiny</option>
<option value="">Select model</option>
<option value="tiny">Tiny </option>
<option value="tiny.en">Tiny (English-only)</option>
<option value="base">Base</option>
<option value="base.en">Base (English-only)</option>
<option value="small" selected>Small</option>
<option value="small.en">Small (English-only)</option>
<option value="medium">Medium</option>
<option value="medium.en">Medium (English-only)</option>
<option value="large-v2">Large-v2</option>
<option value="large-v3">Large-v3</option>
</select>
Expand Down
25 changes: 1 addition & 24 deletions Audio-Transcription-Chrome/popup.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ document.addEventListener("DOMContentLoaded", function () {
const stopButton = document.getElementById("stopCapture");

const useServerCheckbox = document.getElementById("useServerCheckbox");
const useMultilingualCheckbox = document.getElementById('useMultilingualCheckbox');
const languageDropdown = document.getElementById('languageDropdown');
const taskDropdown = document.getElementById('taskDropdown');
const modelSizeDropdown = document.getElementById('modelSizeDropdown');
Expand Down Expand Up @@ -32,14 +31,6 @@ document.addEventListener("DOMContentLoaded", function () {
}
});

chrome.storage.local.get("useMultilingualModelState", ({ useMultilingualModelState }) => {
if (useMultilingualModelState !== undefined) {
useMultilingualCheckbox.checked = useMultilingualModelState;
languageDropdown.disabled = !useMultilingualModelState;
taskDropdown.disabled = !useMultilingualModelState;
}
});

chrome.storage.local.get("selectedLanguage", ({ selectedLanguage: storedLanguage }) => {
if (storedLanguage !== undefined) {
languageDropdown.value = storedLanguage;
Expand Down Expand Up @@ -86,7 +77,6 @@ document.addEventListener("DOMContentLoaded", function () {
tabId: currentTab.id,
host: host,
port: port,
useMultilingual: useMultilingualCheckbox.checked,
language: selectedLanguage,
task: selectedTask,
modelSize: selectedModelSize
Expand Down Expand Up @@ -129,9 +119,8 @@ document.addEventListener("DOMContentLoaded", function () {
startButton.disabled = isCapturing;
stopButton.disabled = !isCapturing;
useServerCheckbox.disabled = isCapturing;
useMultilingualCheckbox.disabled = isCapturing;
modelSizeDropdown.disabled = isCapturing;

taskDropdown.disabled = isCapturing;
startButton.classList.toggle("disabled", isCapturing);
stopButton.classList.toggle("disabled", !isCapturing);
}
Expand All @@ -142,18 +131,6 @@ document.addEventListener("DOMContentLoaded", function () {
chrome.storage.local.set({ useServerState });
});

useMultilingualCheckbox.addEventListener('change', function() {
const useMultilingualModelState = useMultilingualCheckbox.checked;
if (useMultilingualModelState) {
languageDropdown.disabled = false;
taskDropdown.disabled = false;
} else {
languageDropdown.disabled = true;
taskDropdown.disabled = true;
}
chrome.storage.local.set({ useMultilingualModelState });
});

languageDropdown.addEventListener('change', function() {
if (languageDropdown.value === "") {
selectedLanguage = null;
Expand Down
18 changes: 5 additions & 13 deletions whisper_live/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def __init__(
self,
host=None,
port=None,
is_multilingual=False,
lang=None,
translate=False,
model="small",
Expand All @@ -92,8 +91,7 @@ def __init__(
Args:
host (str): The hostname or IP address of the server.
port (int): The port number for the WebSocket server.
is_multilingual (bool, optional): Specifies if multilingual transcription is enabled. Default is False.
lang (str, optional): The selected language for transcription when multilingual is disabled. Default is None.
lang (str, optional): The selected language for transcription. Default is None.
translate (bool, optional): Specifies if the task is translation. Default is False.
"""
self.chunk = 4096
Expand All @@ -102,14 +100,11 @@ def __init__(
self.rate = 16000
self.record_seconds = 60000
self.recording = False
self.multilingual = False
self.language = None
self.task = "transcribe"
self.uid = str(uuid.uuid4())
self.waiting = False
self.last_response_recieved = None
self.disconnect_if_no_response_for = 15
self.multilingual = is_multilingual
self.language = lang
self.model = model
self.server_error = False
Expand Down Expand Up @@ -247,7 +242,7 @@ def on_open(self, ws):
"""
Callback function called when the WebSocket connection is successfully opened.

Sends an initial configuration message to the server, including client UID, multilingual mode,
Sends an initial configuration message to the server, including client UID,
language selection, and task type.

Args:
Expand All @@ -261,7 +256,6 @@ def on_open(self, ws):
json.dumps(
{
"uid": self.uid,
"multilingual": self.multilingual,
"language": self.language,
"task": self.task,
"model": self.model,
Expand Down Expand Up @@ -548,8 +542,7 @@ class TranscriptionClient:
Args:
host (str): The hostname or IP address of the server.
port (int): The port number to connect to on the server.
is_multilingual (bool, optional): Indicates whether the transcription should support multiple languages (default is False).
lang (str, optional): The primary language for transcription (used if `is_multilingual` is False). Default is None, which defaults to English ('en').
lang (str, optional): The primary language for transcription. Default is None, which defaults to English ('en').
translate (bool, optional): Indicates whether translation tasks are required (default is False).

Attributes:
Expand All @@ -558,19 +551,18 @@ class TranscriptionClient:
Example:
To create a TranscriptionClient and start transcription on microphone audio:
```python
transcription_client = TranscriptionClient(host="localhost", port=9090, is_multilingual=True)
transcription_client = TranscriptionClient(host="localhost", port=9090)
transcription_client()
```
"""
def __init__(self,
host,
port,
is_multilingual=False,
lang=None,
translate=False,
model="small",
):
self.client = Client(host, port, is_multilingual, lang, translate, model)
self.client = Client(host, port, lang, translate, model)

def __call__(self, audio=None, hls_url=None):
"""
Expand Down
Loading
Loading