diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cb88c88
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,23 @@
+bk/
+venv/
+test/*.xml
+__pycache__
+*.py[cod]
+*.swp
+
+build/
+develop-eggs/
+dist/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..76a9d2a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,39 @@
+## [CMU Linguistic Annotation Backend](https://github.com/neulab/cmulab/) plugin for [ELAN](https://archive.mpi.nl/tla/elan)
+
+This plugin is still a work in progress. Eventually it will allow ELAN users to invoke various services (translation, transcription, POS tagging etc.) provided by the CMU Linguistic Annotation Backend server
+
+
+### Setup
+
+#### Linux
+
+1. Download the latest version of ELAN from [here](https://archive.mpi.nl/tla/elan/download) and install it:
+```
+wget https://www.mpi.nl/tools/elan/ELAN-XX_linux.tar.gz
+tar xzf ELAN-XX_linux.tar.gz
+```
+
+2. [Download a copy of this repo](https://github.com/zaidsheikh/cmulab_elan_extension/archive/refs/heads/main.zip) and unzip it. Copy the `cmulab_elan_extension-main/` folder into ELAN's extensions dir (`ELAN-XX/lib/app/extensions/`).
+
+#### Mac
+
+1. If ELAN is not already installed on your Mac, [download the latest .dmg installer](https://archive.mpi.nl/tla/elan/download) and install it. It should be installed in the `/Applications/ELAN_XX` directory, where `XX` is the name of the version.
+2. Download this [zip file](https://github.com/zaidsheikh/cmulab_elan_extension/archive/refs/heads/main.zip) and unzip it. You should see a folder named `cmulab_elan_extension-main` containing the contents of this repo.
+3. Right-click `ELAN_XX` and click "Show Package Contents", then copy your `cmulab_elan_extension-main` folder into `ELAN_XX.app/Contents/app/extensions`.
+
+
+#### Windows
+
+1. Download the latest version of ELAN from [here](https://archive.mpi.nl/tla/elan/download) and install it.
+2. [Download a copy of this repo](https://github.com/zaidsheikh/cmulab_elan_extension/archive/refs/heads/main.zip) and unzip it. Copy the `cmulab_elan_extension-main/` folder into ELAN's extensions dir (`ELAN-XX/app/extensions/`).
+3. Install [Python 3](https://www.python.org/downloads/) if it isn't already installed.
+
+
+### Instructions
+
+Start ELAN with the provided test audio file
+
+`ELAN_6-1/bin/ELAN allosaurus-elan/test/allosaurus.wav &`
+
+Switch to the "Recognizers" tab and then select "CMU Linguistic Annotation Backend" from the Recognizer dropdown list at the top and then click the "Start" button.
+If this is your first time using this plugin, you will be prompted to login to the [CMULAB backend server](https://github.com/neulab/cmulab) and get an access token (you can create an account or simply login with an existing Google account):
diff --git a/cmulab_elan_extension.bat b/cmulab_elan_extension.bat
new file mode 100644
index 0000000..6b506b8
--- /dev/null
+++ b/cmulab_elan_extension.bat
@@ -0,0 +1,18 @@
+@echo off
+
+set PYTHONLEGACYWINDOWSIOENCODING=True
+set PYTHONIOENCODING=:replace
+
+If not exist venv\ (
+ echo "PROGRESS: 1% Initial setup: Creating virtual env, installing dependencies"
+ python3 -m venv venv
+ call .\venv\Scripts\activate
+ python3 -m pip --no-input install -r requirements.txt
+ echo "PROGRESS: 5% One-time initialization successfully completed!"
+ call deactivate
+)
+
+echo "Activating venv..."
+call .\venv\Scripts\activate
+python3 .\cmulab_elan_extension.py
+call deactivate
diff --git a/cmulab_elan_extension.cmdi b/cmulab_elan_extension.cmdi
new file mode 100644
index 0000000..252659e
--- /dev/null
+++ b/cmulab_elan_extension.cmdi
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ CMU Linguistic Annotation Backend
+
+ cmulab_elan_extension.html
+
+
+ source
+
+ input_tier
+
+
+ cmulab_service
+
+
+
+
+
+
+
+
+
+
diff --git a/cmulab_elan_extension.html b/cmulab_elan_extension.html
new file mode 100644
index 0000000..33af1fc
--- /dev/null
+++ b/cmulab_elan_extension.html
@@ -0,0 +1,11 @@
+
+
+
+
+ CMU Linguistic Annotation Backend
+
+
+
+
CMU Linguistic Annotation Backend
+
+
diff --git a/cmulab_elan_extension.py b/cmulab_elan_extension.py
new file mode 100755
index 0000000..b7bfa78
--- /dev/null
+++ b/cmulab_elan_extension.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import atexit
+import os
+import os.path
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import unicodedata
+import time
+
+import requests
+import json
+import traceback
+from utils.create_dataset import create_dataset_from_eaf
+
+import PySimpleGUI as sg
+import webbrowser
+
+
+AUTH_TOKEN_FILE = os.path.join(os.path.expanduser("~"), ".cmulab_elan")
+CMULAB_SERVER = "http://miami.lti.cs.cmu.edu:8088"
+
+
+def ping_server(server_url):
+ status_check = None
+ try:
+ status_check = requests.get(server_url.rstrip('/') + "/annotator")
+ except:
+ traceback.print_exc()
+ return status_check
+
+
+def get_server_url():
+ server_url = CMULAB_SERVER
+ status_check = ping_server(server_url)
+ while not status_check:
+ err_msg = "Error connecting to CMULAB server " + server_url
+ layout = [[sg.Text(err_msg + "\nPlease enter new CMULAB server URL")], [sg.Input()], [sg.Button('OK')]]
+ window = sg.Window('CMULAB server URL', layout)
+ event, values = window.read()
+ server_url = values[0].strip().rstrip('/')
+ if not server_url.startswith("http"):
+ server_url = "http://" + server_url
+ window.close()
+ status_check = ping_server(server_url)
+ return server_url
+
+
+def get_params():
+ # The parameters provided by the user via the ELAN recognizer interface
+ # (specified in CMDI).
+ params = {}
+ # Read in all of the parameters that ELAN passes to this local recognizer on
+ # standard input.
+ for line in sys.stdin:
+ match = re.search(r'(.*?)', line)
+ if match:
+ params[match.group(1)] = match.group(2).strip()
+ return params
+
+
+def browser_login(server_url):
+ webbrowser.open(server_url + "/annotator/get_auth_token/")
+
+
+def get_auth_token(server_url):
+ if os.path.exists(AUTH_TOKEN_FILE):
+ with open(AUTH_TOKEN_FILE) as fin:
+ auth_token = fin.read().strip()
+ else:
+ # browser_login(server_url)
+ layout = [[sg.Text('Click link below to get your access token')],
+ [sg.Text(server_url + "/annotator/get_auth_token/", text_color='blue', enable_events=True, key='-LINK-')],
+ [sg.Text("Please enter your access token here")], [sg.Input()], [sg.Button('OK')]]
+ window = sg.Window('Authorization required!', layout, finalize=True)
+ window['-LINK-'].set_cursor(cursor='hand1')
+ while True:
+ event, values = window.read()
+ if event in (sg.WIN_CLOSED, 'Exit'):
+ break
+ elif event == '-LINK-':
+ webbrowser.open(window['-LINK-'].DisplayText)
+ auth_token = values[0].strip()
+ if auth_token:
+ break
+ window.close()
+ with open(AUTH_TOKEN_FILE, 'w') as fout:
+ fout.write(auth_token)
+ return auth_token
+
+
+def get_input_annotations(input_tier):
+ # grab the 'input_tier' parameter, open that
+ # XML document, and read in all of the annotation start times, end times,
+ # and values.
+ # Note: Tiers for the recognizers are in the AVATech tier format, not EAF
+ annotations = []
+ if os.path.exists(input_tier):
+ with open(input_tier, 'r', encoding = 'utf-8') as input_tier_file:
+ for line in input_tier_file:
+ match = re.search(r'(.*?)', line)
+ if match:
+ annotation = { \
+ 'start': int(float(match.group(1)) * 1000.0), \
+ 'end' : int(float(match.group(2)) * 1000.0), \
+ 'value' : match.group(3) }
+ annotations.append(annotation)
+ return annotations
+
+
+
+def phone_transcription(server_url, auth_token, input_audio, annotations):
+ layout = [[sg.Text("Language code"), sg.Input(default_text="eng", key='lang_code')],
+ [sg.Text("Pretrained model"), sg.Input(default_text="eng2102", key='pretrained_model')],
+ [sg.Button('OK')]]
+ window = sg.Window('Allosaurus parameters', layout)
+ event, values = window.read()
+ lang_code = values["lang_code"].strip().lower()
+ pretrained_model = values["pretrained_model"].strip().lower()
+ window.close()
+
+ with open(input_audio,'rb') as audio_file:
+ files = {'file': audio_file}
+ url = server_url + "/annotator/segment/1/annotate/2/"
+ try:
+ headers = {}
+ if auth_token:
+ headers["Authorization"] = auth_token
+ allosaurus_params = {"lang": lang_code, "model": pretrained_model}
+ r = requests.post(url, files=files, data={"segments": json.dumps(annotations), "params": json.dumps(allosaurus_params)}, headers=headers)
+ except:
+ err_msg = "Error connecting to CMULAB server " + server_url
+ sys.stderr.write(err_msg + "\n")
+ traceback.print_exc()
+ sg.Popup(err_msg, title="ERROR")
+ print('RESULT: FAILED.', flush = True)
+ sys.exit(1)
+ print("Response from CMULAB server " + server_url + ": " + r.text)
+ if not r.ok:
+ sg.Popup("Server error, click the report button to view logs.", title="ERROR")
+ print('RESULT: FAILED.', flush = True)
+ sys.exit(1)
+ transcribed_annotations = json.loads(r.text)
+ for annotation in transcribed_annotations:
+ annotation["value"] = annotation["transcription"].replace(' ', '')
+ return transcribed_annotations
+
+
+def finetune_allosaurus(server_url, auth_token, input_audio, annotations):
+ layout = [[sg.Text(err_msg + "\nPlease enter new CMULAB server URL")], [sg.Input()], [sg.Button('OK')]]
+ window = sg.Window('CMULAB server URL', layout)
+ event, values = window.read()
+ server_url = values[0].strip().rstrip('/')
+ window.close()
+
+
+def speaker_diarization(server_url, auth_token, input_audio, annotations):
+ if not annotations:
+ sg.Popup("Please select an input tier containing a few sample annotations for each speaker", title="ERROR")
+ print('RESULT: FAILED.', flush = True)
+ sys.exit(1)
+ layout = [[sg.Text("Threshold"), sg.Slider((0, 1), orientation='h', resolution=0.01, default_value=0.45)],
+ [sg.Button('OK')]]
+ window = sg.Window('Diarization parameters', layout)
+ event, values = window.read()
+ threshold = float(values[0])
+ window.close()
+ print("PROGRESS: 0.5 Running speaker diarization...", flush = True)
+ with open(input_audio,'rb') as audio_file:
+ files = {'file': audio_file}
+ url = server_url + "/annotator/segment/1/annotate/2/"
+ try:
+ headers = {}
+ if auth_token:
+ headers["Authorization"] = auth_token
+ request_params = {"service": "diarization", "threshold": threshold}
+ print(url)
+ print(input_audio)
+ print(json.dumps(annotations, indent=4))
+ print(json.dumps(request_params, indent=4))
+ print(json.dumps(headers, indent=4))
+ r = requests.post(url, files=files,
+ data={"segments": json.dumps(annotations), "params": json.dumps(request_params)},
+ headers=headers)
+ except:
+ err_msg = "Error connecting to CMULAB server " + server_url
+ sys.stderr.write(err_msg + "\n")
+ traceback.print_exc()
+ sg.Popup(err_msg, title="ERROR")
+ print('RESULT: FAILED.', flush = True)
+ sys.exit(1)
+ print("Response from CMULAB server " + server_url + ": " + r.text)
+ if not r.ok:
+ sg.Popup("Server error, click the report button to view logs.", title="ERROR")
+ print('RESULT: FAILED.', flush = True)
+ sys.exit(1)
+ response_data = json.loads(r.text)
+ transcribed_annotations = []
+ for item in response_data:
+ transcribed_annotations.append({
+ "start": item[1],
+ "end": item[2],
+ "value": item[0]
+ })
+ return transcribed_annotations
+
+
+def write_output(output_tier_file, annotations):
+ with open(output_tier_file, 'w', encoding = 'utf-8') as output_tier:
+ # Write document header.
+ output_tier.write('\n')
+ output_tier.write('\n')
+ for annotation in annotations:
+ output_tier.write(' %s\n' %
+ (annotation['start'], annotation['end'], annotation['value']))
+ output_tier.write('\n')
+
+
+def main():
+ params = get_params()
+
+ input_audio = params.get('source')
+ input_tier = params.get('input_tier', 'none specified')
+ output_tier = params.get('output_tier')
+ cmulab_service = params.get('cmulab_service', 'Phone-transcription')
+ print("input_tier: " + input_tier)
+ print("cmulab_service: " + cmulab_service)
+
+ server_url = get_server_url()
+
+ auth_token = get_auth_token(server_url)
+
+ print("PROGRESS: 0.1 Loading annotations from input tier", flush = True)
+ annotations = get_input_annotations(input_tier)
+
+ if cmulab_service == "Phone-transcription":
+ output_annotations = phone_transcription(server_url, auth_token, input_audio, annotations)
+ elif cmulab_service == "Finetune-allosaurus":
+ output_annotations = finetune_allosaurus(server_url, auth_token, input_audio, annotations)
+ elif cmulab_service == "Speaker-diarization":
+ output_annotations = speaker_diarization(server_url, auth_token, input_audio, annotations)
+ else:
+ print("RESULT: FAILED. Not supported!", flush = True)
+ sys.exit(1)
+
+ print("PROGRESS: 0.95 Preparing output tier", flush = True)
+ write_output(output_tier, output_annotations)
+ print('RESULT: DONE.', flush = True)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/cmulab_elan_extension.sh b/cmulab_elan_extension.sh
new file mode 100755
index 0000000..6db3aa2
--- /dev/null
+++ b/cmulab_elan_extension.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# It seems that recognizer processes invoked by ELAN don't inherit any regular
+# environmental variables (like PATH), which makes it difficult to track down
+# where both Python and ffmpeg(1) might be. These same processes also have
+# their locale set to C. This implies a default ASCII file encoding.
+
+export LC_ALL="en_US.UTF-8"
+export PYTHONIOENCODING="utf-8"
+
+# change to cmulab_elan_extension dir
+cd "$(dirname "$0")"
+
+if [ ! -d "venv" ]; then
+ echo "PROGRESS: 1% (Initial setup) Creating virtual env, installing dependencies"
+ python3 -m venv venv
+ source venv/bin/activate
+ python3 -m pip --no-input install -r requirements.txt
+ echo "PROGRESS: 5% One-time initialization successfully completed!"
+ deactivate
+fi
+
+source venv/bin/activate
+python3 ./cmulab_elan_extension.py
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2fd8a96
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+requests
+pydub
+pympi-ling
+PySimpleGUI
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/create_dataset.py b/utils/create_dataset.py
new file mode 100644
index 0000000..f35e1fa
--- /dev/null
+++ b/utils/create_dataset.py
@@ -0,0 +1,32 @@
+import argparse
+import pympi
+import pydub
+from pathlib import Path
+
+
+def create_dataset_from_eaf(eaf_file, output_dir, tier_name="Allosaurus"):
+ print(eaf_file)
+ print(output_dir)
+ print(tier_name)
+ output_dir_path = Path(output_dir)
+ output_dir_path.mkdir(parents=True, exist_ok=True)
+ input_elan = pympi.Elan.Eaf(file_path=eaf_file)
+ audio_file_path = input_elan.media_descriptors[0]["MEDIA_URL"][len("file://"):]
+ full_audio = pydub.AudioSegment.from_file(audio_file_path, format = 'wav')
+ for segment_id in input_elan.tiers[tier_name][0]:
+ start_id, end_id, transcription, _ = input_elan.tiers[tier_name][0][segment_id]
+ start = input_elan.timeslots[start_id]
+ end = input_elan.timeslots[end_id]
+ clip = full_audio[start:end]
+ clip.export(output_dir_path / (segment_id + ".wav"), format = 'wav')
+ (output_dir_path / (segment_id + ".txt")).write_text(transcription)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="convert EAF file to dataset required for fine-tuning allosaurus")
+ parser.add_argument('eaf_file', type=str, help="EAF file with phone transcriptions")
+ parser.add_argument('output_dir', type=str, help="output dir")
+ parser.add_argument('--tier', type=str, default="Allosaurus", help="Tier containing phone transcriptions")
+ args = parser.parse_args()
+ create_dataset_from_eaf(args.eaf_file, args.output_dir, args.tier)
diff --git a/utils/requirements.txt b/utils/requirements.txt
new file mode 100644
index 0000000..1e982ec
--- /dev/null
+++ b/utils/requirements.txt
@@ -0,0 +1,2 @@
+pydub
+pympi-ling