diff --git a/README.md b/README.md index d52ef9a4..633fdb3a 100644 --- a/README.md +++ b/README.md @@ -86,9 +86,14 @@ client("tests/jfk.wav") client() ``` +- TO transcribe from a RTSP stream: +```python +client(rtsp_url="rtsp://admin:admin@192.168.0.1/rtsp") +``` + - To transcribe from a HLS stream: ```python -client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8") +client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8") ``` ## Browser Extensions diff --git a/whisper_live/client.py b/whisper_live/client.py index 595cb57e..5a8eaac6 100644 --- a/whisper_live/client.py +++ b/whisper_live/client.py @@ -295,7 +295,7 @@ def __init__(self, clients): print(f"[WARN]: Unable to access microphone. {error}") self.stream = None - def __call__(self, audio=None, hls_url=None, save_file=None): + def __call__(self, audio=None, rtsp_url=None, hls_url=None, save_file=None): """ Start the transcription process. @@ -307,6 +307,10 @@ def __call__(self, audio=None, hls_url=None, save_file=None): audio (str, optional): Path to an audio file for transcription. Default is None, which triggers live recording. """ + assert sum( + source is not None for source in [audio, rtsp_url, hls_url] + ) <= 1, 'You must provide only one selected source' + print("[INFO]: Waiting for server ready ...") for client in self.clients: while not client.recording: @@ -320,6 +324,8 @@ def __call__(self, audio=None, hls_url=None, save_file=None): elif audio is not None: resampled_file = utils.resample(audio) self.play_file(resampled_file) + elif rtsp_url is not None: + self.process_rtsp_stream(rtsp_url) else: self.record() @@ -398,6 +404,16 @@ def play_file(self, filename): self.write_all_clients_srt() print("[INFO]: Keyboard interrupt.") + def process_rtsp_stream(self, rtsp_url): + """ + Connect to an RTSP source, process the audio stream, and send it for trascription. + + Args: + rtsp_url (str): The URL of the RTSP stream source. + """ + process = self.get_rtsp_ffmpeg_process(rtsp_url) + self.handle_ffmpeg_process(process, stream_type='RTSP') + def process_hls_stream(self, hls_url, save_file): """ Connect to an HLS source, process the audio stream, and send it for transcription. @@ -406,27 +422,12 @@ def process_hls_stream(self, hls_url, save_file): hls_url (str): The URL of the HLS stream source. save_file (str, optional): Local path to save the network stream. """ - print("[INFO]: Connecting to HLS stream...") - process = None # Initialize process to None + process = self.get_hls_ffmpeg_process(hls_url, save_file) + self.handle_ffmpeg_process(process, stream_type='HLS') + def handle_ffmpeg_process(self, process, stream_type): + print(f"[INFO]: Connecting to {stream_type} stream...") try: - # Connecting to the HLS stream using ffmpeg-python - if save_file is None: - process = ( - ffmpeg - .input(hls_url, threads=0) - .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate) - .run_async(pipe_stdout=True, pipe_stderr=True) - ) - else: - input = ffmpeg.input(hls_url, threads=0) - output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet') - output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate) - process = ( - ffmpeg.merge_outputs(output_file, output_std) - .run_async(pipe_stdout=True, pipe_stderr=True) - ) - # Process the stream while True: in_bytes = process.stdout.read(self.chunk * 2) # 2 bytes per sample @@ -436,14 +437,41 @@ def process_hls_stream(self, hls_url, save_file): self.multicast_packet(audio_array.tobytes()) except Exception as e: - print(f"[ERROR]: Failed to connect to HLS stream: {e}") + print(f"[ERROR]: Failed to connect to {stream_type} stream: {e}") finally: - self.close_all_clients() + self.close_all_clients() self.write_all_clients_srt() if process: process.kill() - print("[INFO]: HLS stream processing finished.") + print(f"[INFO]: {stream_type} stream processing finished.") + + def get_rtsp_ffmpeg_process(self, rtsp_url): + return ( + ffmpeg + .input(rtsp_url, threads=0) + .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate) + .run_async(pipe_stdout=True, pipe_stderr=True) + ) + + def get_hls_ffmpeg_process(self, hls_url, save_file): + if save_file is None: + process = ( + ffmpeg + .input(hls_url, threads=0) + .output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate) + .run_async(pipe_stdout=True, pipe_stderr=True) + ) + else: + input = ffmpeg.input(hls_url, threads=0) + output_file = input.output(save_file, acodec='copy', vcodec='copy').global_args('-loglevel', 'quiet') + output_std = input.output('-', format='s16le', acodec='pcm_s16le', ac=1, ar=self.rate) + process = ( + ffmpeg.merge_outputs(output_file, output_std) + .run_async(pipe_stdout=True, pipe_stderr=True) + ) + + return process def record(self, out_file="output_recording.wav"): """